Compare commits
570 Commits
tmp-eevee-
...
universal-
Author | SHA1 | Date | |
---|---|---|---|
82ffff9a91 | |||
5ea31db2ce | |||
c1cd7c6b4c | |||
4755927229 | |||
22b0019e94 | |||
11b33f3a3d | |||
ac02bc5390 | |||
beff110ab1 | |||
3b9df7b9bb | |||
f8faefab13 | |||
2aaa085f4c | |||
353f83ddda | |||
94a35ab24a | |||
c30ad3ad30 | |||
c0948d75c1 | |||
5f2d3decd3 | |||
d68c0afd23 | |||
ad9446b08d | |||
b17e75f876 | |||
1b96dd535a | |||
d355f3ed2b | |||
78825783cc | |||
144f74230f | |||
ade37c0212 | |||
46153f1898 | |||
996eb1bd5f | |||
7fe42ea566 | |||
dd65e9901f | |||
b02778147d | |||
d1b6f16906 | |||
ecb88eff7e | |||
03806d0b67 | |||
92dc89e28a | |||
6a5caf9d88 | |||
e9f8020f75 | |||
a55e38e05d | |||
7fb1f060ff | |||
02c2970983 | |||
8b8d8acc84 | |||
3e721195b0 | |||
b5fa180d5d | |||
68a8f96cca | |||
2fef2f707a | |||
a280554b75 | |||
a91c6e71db | |||
fdf6252b5a | |||
6583acb880 | |||
6eccea7a6f | |||
5b0cb65a56 | |||
8a135a9a5f | |||
ad7f8a1fa5 | |||
33cbd063d3 | |||
e1b58be9e2 | |||
5db40f5747 | |||
c38cac9682 | |||
f1f3ff0237 | |||
4ab91dc0c9 | |||
2d34a09043 | |||
ece43dd716 | |||
7699c7407d | |||
ff3fd5f1ce | |||
3e049973ee | |||
07ae1feed2 | |||
ba6c9a6f59 | |||
7c1e26dd64 | |||
5a86193b1c | |||
cc925b9282 | |||
![]() |
ceba1854f9 | ||
8abef86217 | |||
c7d7175270 | |||
9b7eb57cf7 | |||
46b9dbc3f8 | |||
5a85b62bbc | |||
82ab491ab5 | |||
5fd4d47206 | |||
e55f8b0b52 | |||
c7611d61e8 | |||
f2121d94bb | |||
79c1dc65a7 | |||
dfbbf411d9 | |||
248d81fcbf | |||
c437a8aea8 | |||
74be1acc7a | |||
3eed00dc54 | |||
87e5d7212c | |||
c25c46b77b | |||
25e57c39b1 | |||
488a757c83 | |||
74bf0c834b | |||
695b6e3ede | |||
e1e2dae317 | |||
19222627c6 | |||
7187ccb998 | |||
e61566137b | |||
46e13cf8a5 | |||
cc4587a9b4 | |||
5cd2be7d54 | |||
be9104f7aa | |||
6bd058189c | |||
162e6c7951 | |||
da9dff053e | |||
d9398bb53f | |||
2ee5560b3f | |||
2cd7e70c18 | |||
dd2ab55507 | |||
8806d7551d | |||
8b416f7f60 | |||
d6774174e9 | |||
4b6ce4512c | |||
d891b38776 | |||
4ec9aff2af | |||
68181c2560 | |||
d7dd7ee24c | |||
e95ba8a70e | |||
a797c0c05c | |||
64189ea52f | |||
0e4312ceec | |||
2753cc6e39 | |||
08f24553dc | |||
9b129e5533 | |||
7838eb12c6 | |||
54b1e71dda | |||
69677827ff | |||
42f636f7e8 | |||
60abeddc55 | |||
cb95f8aea7 | |||
d4480fdfa3 | |||
6ffaee8d9a | |||
bf7ccd43ca | |||
773903f43c | |||
9b3ce950e6 | |||
f6c6805226 | |||
a4b92a6814 | |||
932148ec54 | |||
09b9106e3d | |||
0250b40750 | |||
c6ea00de3c | |||
1a6943d553 | |||
c624e56ffc | |||
5de9a5dea5 | |||
9808d6abd8 | |||
6b84636ff2 | |||
a52c0a252f | |||
c352eeb213 | |||
ef60b13c1f | |||
9c79875f65 | |||
891f47b801 | |||
8094d389f7 | |||
09d3ebfd72 | |||
efc2e5134f | |||
4ebb66864a | |||
8a51d61b44 | |||
eb5fead5ac | |||
b2a536e9d7 | |||
77c273ee37 | |||
4cdf27cd96 | |||
9f41f95c8e | |||
c2a0decbf3 | |||
851de8170d | |||
4e7a7e613a | |||
bf5a89f4e0 | |||
81b53aa507 | |||
55843cd64b | |||
1e9564864c | |||
6da512f0bc | |||
9edb1d0a7c | |||
2ccb820c7e | |||
47934b5c2b | |||
be6847e773 | |||
300c673a64 | |||
368559647f | |||
c785e7431e | |||
d705c8ed57 | |||
9d15b3f424 | |||
c9285f83ab | |||
c39b81d832 | |||
6e7242f00c | |||
4cb119f533 | |||
10c0c2a156 | |||
ab63fe9eab | |||
a2551f23ad | |||
f178e3f849 | |||
7a76f2ae77 | |||
b1abc23899 | |||
043bff144d | |||
d465b92823 | |||
7ad1d3156c | |||
2cad80b0e5 | |||
![]() |
c59d2f3f2d | ||
1e6ed77896 | |||
72a2229848 | |||
b7e39acfcd | |||
b07085fe3c | |||
753eb9c20e | |||
05b177b0b3 | |||
58752ad93c | |||
0f29a65744 | |||
1116d821dc | |||
b9bf4700b0 | |||
![]() |
0b63136e63 | ||
7ae5bc142b | |||
a7ccb3df3e | |||
a6ebe0ca62 | |||
d8112ec1ea | |||
6417da28a8 | |||
33193093b1 | |||
a0b4da2bd0 | |||
904b7e4e41 | |||
d5d48a986b | |||
5bac672e1a | |||
e3b5a2ae15 | |||
0d798ef57c | |||
972f58c482 | |||
ae84a2956e | |||
4387dbf03e | |||
837ba82402 | |||
1a94457586 | |||
35cde8b5fd | |||
ef46f735f3 | |||
7481a36d51 | |||
3807a0981e | |||
ddc6815b1a | |||
fc6d13eced | |||
e424931cf7 | |||
6661342dc5 | |||
02c3889b1c | |||
8d35b28f2a | |||
00a7344f27 | |||
8de121ffc6 | |||
c3b53abc10 | |||
e104d2f7d4 | |||
dcc80748d3 | |||
7b9d1cb51f | |||
db2eaa5c86 | |||
e1a29b58bb | |||
87f96b7664 | |||
a755e6e63f | |||
f72d28d7e7 | |||
db4e7616f3 | |||
f4db58844d | |||
acf7f46b77 | |||
4fbc9c42e5 | |||
5ca65001ea | |||
c9f02569c7 | |||
eb9647c53d | |||
7ba59c8a62 | |||
b3fb73f325 | |||
459c210154 | |||
715acf758c | |||
cfe828b452 | |||
4cc94679dd | |||
![]() |
39f63c8c08 | ||
d4f7f1dfe4 | |||
a76b5d3a07 | |||
59b9a88a35 | |||
4126284e46 | |||
1fd0c80725 | |||
5215543280 | |||
1e449bee21 | |||
5040c39d1a | |||
a3e6cb5dab | |||
fe0a219b5b | |||
09498264f6 | |||
46c34ba1f6 | |||
d02f863b18 | |||
d851c91856 | |||
a6dcf6e6cb | |||
818c16ef1f | |||
750e278fd1 | |||
85f8ba9db7 | |||
1ac80e8338 | |||
6dc88682ea | |||
36937e523a | |||
0fa34aa0ec | |||
e928dd300b | |||
dc46465490 | |||
1d7bf4f826 | |||
![]() |
526f2273c6 | ||
bb9eb262d4 | |||
1f615690e6 | |||
6a0b1eae8c | |||
cb77865c21 | |||
7dbf2e2e95 | |||
c92c52f0c9 | |||
dfacaf4f40 | |||
0dfc102531 | |||
1e794d9a92 | |||
684789c815 | |||
72a85d976a | |||
8d20db0594 | |||
64f83f80c6 | |||
9b7d71cec2 | |||
64e4aede7f | |||
675717eb79 | |||
c2a5c592fe | |||
0e1a6f6033 | |||
d42c803f5b | |||
218a909dec | |||
bea1eff3a5 | |||
d5c60fb685 | |||
9fecf1f8b8 | |||
99e71ec1f2 | |||
4f19e99cb1 | |||
147c75b831 | |||
0d9fbfe7fe | |||
da0c182a61 | |||
dd171f7743 | |||
b68bac7ced | |||
173a8f4ac9 | |||
164f591033 | |||
83a6642045 | |||
158f87203e | |||
d92c28582a | |||
d165d6aa2a | |||
5f3c97b999 | |||
afd6358fc0 | |||
701f90c677 | |||
9f4ee75c9b | |||
2e19aa1399 | |||
918cf6cd77 | |||
af8941e6a8 | |||
f828ecf4ba | |||
86b3073c9e | |||
2a7440176e | |||
6ea3fdebc8 | |||
77963ff778 | |||
f0669ff8ba | |||
3f40962414 | |||
32149f8d7a | |||
91346755ce | |||
a02fa6c40d | |||
10354b043f | |||
c7456272b1 | |||
77aa9e8809 | |||
d33960aead | |||
085c854b2a | |||
82867753cf | |||
232e02282e | |||
b9fa32cccd | |||
197eee6e04 | |||
e732580fcc | |||
158f809dcb | |||
19ea673260 | |||
b723a398f3 | |||
0f708fa2e3 | |||
6478eb565a | |||
fefc6a73b3 | |||
9f4edf8c2a | |||
ce44953933 | |||
343bb4a5a3 | |||
efabe81c91 | |||
0e6da74e98 | |||
8a32d56056 | |||
7351f533e0 | |||
5c4e1ed578 | |||
6f8c441950 | |||
d411be8a99 | |||
fae661a1ab | |||
923152d180 | |||
0ea15a6fbb | |||
284cdbb6cf | |||
bad2c3b9ef | |||
88f9c55f7f | |||
dc9f7fe64f | |||
![]() |
5d30c3994e | ||
51ceeb506f | |||
01480229b1 | |||
b77c82e2bb | |||
c2c62c3618 | |||
a8d951abdd | |||
4cbe0bff34 | |||
48d9363fa7 | |||
8ac3096e24 | |||
2ee9c12a23 | |||
2d351e9ee3 | |||
7e0e07657c | |||
5c8edbd99b | |||
2cfc4d7644 | |||
bc0d3c91b1 | |||
50918d44fb | |||
![]() |
1649921791 | ||
50dfd5f501 | |||
bfa7f9db0e | |||
b8e15a4a84 | |||
7ca651d182 | |||
3bed78ff59 | |||
22edf04458 | |||
666c2ea012 | |||
ca183993a5 | |||
0e196bab76 | |||
f222fe6a3a | |||
8b35db914e | |||
1883e782cb | |||
7effc6ffc4 | |||
f3d7de709f | |||
3c8f7b1a64 | |||
0381fe7bfe | |||
5f842ef336 | |||
5b110548eb | |||
9fd71d470e | |||
94d280fc3f | |||
9103978952 | |||
9c03a1c92f | |||
a0f5240089 | |||
0ab3ac7a41 | |||
a1282ab015 | |||
f0361cada6 | |||
43f308f216 | |||
aab707ab70 | |||
4ed8a360e9 | |||
4d3bfb3f41 | |||
d781e52ee0 | |||
09eb4fe19a | |||
6aa1b5d031 | |||
5c994d7846 | |||
53b057aa09 | |||
e817cff009 | |||
41ddd3d732 | |||
f5552d759c | |||
f01bf82480 | |||
8d9d16fb53 | |||
349350b304 | |||
cb5318b651 | |||
bd6b0bac88 | |||
3002670332 | |||
f086cf3cea | |||
2609ca2b8e | |||
![]() |
7e8153b07d | ||
622cad7073 | |||
44daeaae7d | |||
db8b5a2316 | |||
dbca0cc9d5 | |||
e4f77c1a6c | |||
![]() |
e27c89c7c7 | ||
af5706c960 | |||
a99022e22d | |||
d5af895419 | |||
![]() |
8703db393b | ||
![]() |
f152159101 | ||
d3500c482f | |||
3a1583972a | |||
6dcfb6df9c | |||
b0b9e746fa | |||
8be3fcab7e | |||
d20f992322 | |||
a38d99e0b2 | |||
75db4c082b | |||
![]() |
2d994de77c | ||
961d99d3a4 | |||
430cc9d7bf | |||
a3551ed878 | |||
288b13b252 | |||
85b2bce037 | |||
c7b601c79e | |||
81f8d74f6d | |||
9c14039a8f | |||
363aff2ad0 | |||
39e0bbfa55 | |||
8fe4f3b756 | |||
![]() |
a86f657692 | ||
8adebaeb7c | |||
773a36d2f8 | |||
cc623ee7b0 | |||
deaddbdcff | |||
f2538c7173 | |||
4bd3b02984 | |||
7beb487e9a | |||
9ad3a85f8b | |||
![]() |
654e1e901b | ||
![]() |
46c9f7702a | ||
![]() |
be0912a402 | ||
![]() |
0a3df611e7 | ||
![]() |
6d297c35c8 | ||
329eeacc66 | |||
2627635ff3 | |||
d6b6050e5b | |||
731c3efd97 | |||
9f5c17f4af | |||
4fcc9f5e7e | |||
7de1a4d1d8 | |||
d3949a4fdb | |||
b642dc7bc7 | |||
501352ef05 | |||
1f9e90bb1c | |||
56a97ba816 | |||
921fc1e44c | |||
91368f7b7d | |||
edc59cefb6 | |||
27eea5e69e | |||
6a47862215 | |||
6ce3e0495a | |||
98670cfe82 | |||
90f1d1f4b6 | |||
4d8c634820 | |||
0f7433a4c8 | |||
f428fe774b | |||
7e0fb88a68 | |||
4fa478c724 | |||
784ea87375 | |||
75064c7024 | |||
ae342e00ca | |||
471636ffcd | |||
cf8cf884d1 | |||
f8e871168a | |||
50a3004328 | |||
a067e59004 | |||
61a9ee88a3 | |||
69251411bc | |||
fb1f756bd1 | |||
90e1c892b7 | |||
19fa05d37c | |||
95185f6e7e | |||
219a71a427 | |||
5156e12a0a | |||
5e54c0cbf1 | |||
095f016fc0 | |||
1dfe4938cd | |||
3f3910966a | |||
a61fdcd349 | |||
9a2680a626 | |||
a37e7e682f | |||
833df7ebc1 | |||
b665ae266d | |||
1770d462cc | |||
1417bf6525 | |||
df1edf2934 | |||
fa2de72f86 | |||
9374a3dbf0 | |||
0869c6b46d | |||
1d5d49775c | |||
9d39948871 | |||
06022f7891 | |||
7aa4b6f93f | |||
69c2c9de0f | |||
900ddd763a | |||
00aa4252c5 | |||
8f17cb2052 | |||
82964294a5 | |||
d742007c8e | |||
2b3f5cb965 | |||
18320355ea | |||
a9c8425de8 | |||
2b6306c1ea | |||
1e13fc105a | |||
803f19b413 | |||
d9ca13066f | |||
8ca67ef025 | |||
8ef0925c83 | |||
27c6f3fca5 | |||
7345fe7c8c | |||
f796b72cf5 | |||
da766dd71c | |||
f71ad78dc1 | |||
85172cb5e1 | |||
e2a783f8eb | |||
265df7b3a6 | |||
f1828d3430 | |||
1496105327 | |||
3f2a1fa87c | |||
63dfc81631 | |||
5724b0dd41 | |||
f42ca488d2 | |||
eb747dbc66 | |||
8513cc6e44 | |||
ffa078a079 | |||
0562c8b250 | |||
182443da4b | |||
2e32a0871f | |||
baeeb1488e | |||
d143e8ff75 |
@@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
"project_id" : "Blender",
|
|
||||||
"conduit_uri" : "https://developer.blender.org/",
|
|
||||||
"phabricator.uri" : "https://developer.blender.org/",
|
|
||||||
"git.default-relative-commit" : "origin/master",
|
|
||||||
"arc.land.update.default" : "rebase",
|
|
||||||
"arc.land.onto.default" : "master"
|
|
||||||
}
|
|
@@ -236,6 +236,8 @@ ForEachMacros:
|
|||||||
- LOOP_UNSELECTED_POINTS
|
- LOOP_UNSELECTED_POINTS
|
||||||
- LOOP_VISIBLE_KEYS
|
- LOOP_VISIBLE_KEYS
|
||||||
- LOOP_VISIBLE_POINTS
|
- LOOP_VISIBLE_POINTS
|
||||||
|
- LIGHT_FOREACH_BEGIN_DIRECTIONAL
|
||||||
|
- LIGHT_FOREACH_BEGIN_LOCAL
|
||||||
- LISTBASE_CIRCULAR_BACKWARD_BEGIN
|
- LISTBASE_CIRCULAR_BACKWARD_BEGIN
|
||||||
- LISTBASE_CIRCULAR_FORWARD_BEGIN
|
- LISTBASE_CIRCULAR_FORWARD_BEGIN
|
||||||
- LISTBASE_FOREACH
|
- LISTBASE_FOREACH
|
||||||
|
@@ -1,9 +1,9 @@
|
|||||||
name: Bug Report
|
name: Bug Report
|
||||||
about: File a bug report
|
about: File a bug report
|
||||||
labels:
|
labels:
|
||||||
- "type::Report"
|
- "Type/Report"
|
||||||
- "status::Needs Triage"
|
- "Status/Needs Triage"
|
||||||
- "priority::Normal"
|
- "Priority/Normal"
|
||||||
body:
|
body:
|
||||||
- type: markdown
|
- type: markdown
|
||||||
attributes:
|
attributes:
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
name: Design
|
name: Design
|
||||||
about: Create a design task (for developers only)
|
about: Create a design task (for developers only)
|
||||||
labels:
|
labels:
|
||||||
- "type::Design"
|
- "Type/Design"
|
||||||
body:
|
body:
|
||||||
- type: textarea
|
- type: textarea
|
||||||
id: body
|
id: body
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
name: To Do
|
name: To Do
|
||||||
about: Create a to do task (for developers only)
|
about: Create a to do task (for developers only)
|
||||||
labels:
|
labels:
|
||||||
- "type::To Do"
|
- "Type/To Do"
|
||||||
body:
|
body:
|
||||||
- type: textarea
|
- type: textarea
|
||||||
id: body
|
id: body
|
||||||
|
3
.github/pull_request_template.md
vendored
3
.github/pull_request_template.md
vendored
@@ -1,5 +1,4 @@
|
|||||||
This repository is only used as a mirror of git.blender.org. Blender development happens on
|
This repository is only used as a mirror. Blender development happens on projects.blender.org.
|
||||||
https://developer.blender.org.
|
|
||||||
|
|
||||||
To get started with contributing code, please see:
|
To get started with contributing code, please see:
|
||||||
https://wiki.blender.org/wiki/Process/Contributing_Code
|
https://wiki.blender.org/wiki/Process/Contributing_Code
|
||||||
|
3
.github/stale.yml
vendored
3
.github/stale.yml
vendored
@@ -15,8 +15,7 @@ staleLabel: stale
|
|||||||
# Comment to post when closing a stale Issue or Pull Request.
|
# Comment to post when closing a stale Issue or Pull Request.
|
||||||
closeComment: >
|
closeComment: >
|
||||||
This issue has been automatically closed, because this repository is only
|
This issue has been automatically closed, because this repository is only
|
||||||
used as a mirror of git.blender.org. Blender development happens on
|
used as a mirror. Blender development happens on projects.blender.org.
|
||||||
developer.blender.org.
|
|
||||||
|
|
||||||
To get started contributing code, please read:
|
To get started contributing code, please read:
|
||||||
https://wiki.blender.org/wiki/Process/Contributing_Code
|
https://wiki.blender.org/wiki/Process/Contributing_Code
|
||||||
|
20
.gitignore
vendored
20
.gitignore
vendored
@@ -39,7 +39,7 @@ Desktop.ini
|
|||||||
/doc/python_api/rst/bmesh.ops.rst
|
/doc/python_api/rst/bmesh.ops.rst
|
||||||
|
|
||||||
# in-source lib downloads
|
# in-source lib downloads
|
||||||
/build_files/build_environment/downloads
|
/build_files/build_environment/downloads/
|
||||||
|
|
||||||
# in-source buildbot signing configuration
|
# in-source buildbot signing configuration
|
||||||
/build_files/buildbot/codesign/config_server.py
|
/build_files/buildbot/codesign/config_server.py
|
||||||
@@ -48,4 +48,20 @@ Desktop.ini
|
|||||||
waveletNoiseTile.bin
|
waveletNoiseTile.bin
|
||||||
|
|
||||||
# testing environment
|
# testing environment
|
||||||
/Testing
|
/Testing/
|
||||||
|
|
||||||
|
# Translations.
|
||||||
|
/locale/user-config.py
|
||||||
|
|
||||||
|
# External repositories.
|
||||||
|
/scripts/addons/
|
||||||
|
/scripts/addons_contrib/
|
||||||
|
|
||||||
|
# Ignore old submodules directories.
|
||||||
|
# Eventually need to get rid of those, but for the first time of transition
|
||||||
|
# avoid indidents when the folders exists after bisect and developers staging
|
||||||
|
# them by accident.
|
||||||
|
/release/scripts/addons/
|
||||||
|
/release/datafiles/locale/
|
||||||
|
/release/scripts/addons_contrib/
|
||||||
|
/source/tools/
|
||||||
|
20
.gitmodules
vendored
20
.gitmodules
vendored
@@ -1,20 +0,0 @@
|
|||||||
[submodule "release/scripts/addons"]
|
|
||||||
path = release/scripts/addons
|
|
||||||
url = ../blender-addons.git
|
|
||||||
branch = master
|
|
||||||
ignore = all
|
|
||||||
[submodule "release/scripts/addons_contrib"]
|
|
||||||
path = release/scripts/addons_contrib
|
|
||||||
url = ../blender-addons-contrib.git
|
|
||||||
branch = master
|
|
||||||
ignore = all
|
|
||||||
[submodule "release/datafiles/locale"]
|
|
||||||
path = release/datafiles/locale
|
|
||||||
url = ../blender-translations.git
|
|
||||||
branch = master
|
|
||||||
ignore = all
|
|
||||||
[submodule "source/tools"]
|
|
||||||
path = source/tools
|
|
||||||
url = ../blender-dev-tools.git
|
|
||||||
branch = master
|
|
||||||
ignore = all
|
|
@@ -524,7 +524,7 @@ endif()
|
|||||||
if(NOT APPLE)
|
if(NOT APPLE)
|
||||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||||
endif()
|
endif()
|
||||||
@@ -625,8 +625,10 @@ mark_as_advanced(
|
|||||||
|
|
||||||
# Vulkan
|
# Vulkan
|
||||||
option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
|
option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
|
||||||
|
option(WITH_VULKAN_GUARDEDALLOC "Use guardedalloc for host allocations done inside Vulkan (development option)" OFF)
|
||||||
mark_as_advanced(
|
mark_as_advanced(
|
||||||
WITH_VULKAN_BACKEND
|
WITH_VULKAN_BACKEND
|
||||||
|
WITH_VULKAN_GUARDEDALLOC
|
||||||
)
|
)
|
||||||
|
|
||||||
# Metal
|
# Metal
|
||||||
@@ -952,21 +954,6 @@ endif()
|
|||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Check if Sub-modules are Cloned
|
# Check if Sub-modules are Cloned
|
||||||
|
|
||||||
if(WITH_INTERNATIONAL)
|
|
||||||
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/datafiles/locale")
|
|
||||||
list(LENGTH RESULT DIR_LEN)
|
|
||||||
if(DIR_LEN EQUAL 0)
|
|
||||||
message(
|
|
||||||
WARNING
|
|
||||||
"Translation path '${CMAKE_SOURCE_DIR}/release/datafiles/locale' is missing, "
|
|
||||||
"This is a 'git submodule', which are known not to work with bridges to other version "
|
|
||||||
"control systems."
|
|
||||||
)
|
|
||||||
set(TRANSLATIONS_FOUND OFF)
|
|
||||||
set_and_warn_library_found("Translations" TRANSLATIONS_FOUND WITH_INTERNATIONAL)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(WITH_PYTHON)
|
if(WITH_PYTHON)
|
||||||
# While we have this as an '#error' in 'bpy_capi_utils.h',
|
# While we have this as an '#error' in 'bpy_capi_utils.h',
|
||||||
# upgrading Python tends to cause confusion for users who build.
|
# upgrading Python tends to cause confusion for users who build.
|
||||||
@@ -982,14 +969,14 @@ if(WITH_PYTHON)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/scripts/addons")
|
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/scripts/addons")
|
||||||
list(LENGTH RESULT DIR_LEN)
|
list(LENGTH RESULT DIR_LEN)
|
||||||
if(DIR_LEN EQUAL 0)
|
if(DIR_LEN EQUAL 0)
|
||||||
message(
|
message(
|
||||||
WARNING
|
WARNING
|
||||||
"Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
|
"Addons path '${CMAKE_SOURCE_DIR}/scripts/addons' is missing. "
|
||||||
"This is a 'git submodule', which are known not to work with bridges to other version "
|
"This is an external repository which needs to be checked out. Use `make update` to do so. "
|
||||||
"control systems: * CONTINUING WITHOUT ADDONS *"
|
"* CONTINUING WITHOUT ADDONS *"
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
36
GNUmakefile
36
GNUmakefile
@@ -69,7 +69,7 @@ Static Source Code Checking
|
|||||||
* check_cmake: Runs our own cmake file checker which detects errors in the cmake file list definitions.
|
* check_cmake: Runs our own cmake file checker which detects errors in the cmake file list definitions.
|
||||||
* check_pep8: Checks all Python script are pep8 which are tagged to use the stricter formatting.
|
* check_pep8: Checks all Python script are pep8 which are tagged to use the stricter formatting.
|
||||||
* check_mypy: Checks all Python scripts using mypy,
|
* check_mypy: Checks all Python scripts using mypy,
|
||||||
see: source/tools/check_source/check_mypy_config.py scripts which are included.
|
see: tools/check_source/check_mypy_config.py scripts which are included.
|
||||||
|
|
||||||
Documentation Checking
|
Documentation Checking
|
||||||
|
|
||||||
@@ -85,7 +85,7 @@ Spell Checkers
|
|||||||
* check_spelling_osl: Check for spelling errors (OSL only).
|
* check_spelling_osl: Check for spelling errors (OSL only).
|
||||||
* check_spelling_py: Check for spelling errors (Python only).
|
* check_spelling_py: Check for spelling errors (Python only).
|
||||||
|
|
||||||
Note: an additional word-list is maintained at: 'source/tools/check_source/check_spelling_c_config.py'
|
Note: an additional word-list is maintained at: 'tools/check_source/check_spelling_c_config.py'
|
||||||
|
|
||||||
Note: that spell checkers can take a 'CHECK_SPELLING_CACHE' filepath argument,
|
Note: that spell checkers can take a 'CHECK_SPELLING_CACHE' filepath argument,
|
||||||
so re-running does not need to re-check unchanged files.
|
so re-running does not need to re-check unchanged files.
|
||||||
@@ -299,7 +299,11 @@ else
|
|||||||
ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
|
ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
|
||||||
DEPS_BUILD_COMMAND:=ninja
|
DEPS_BUILD_COMMAND:=ninja
|
||||||
else
|
else
|
||||||
DEPS_BUILD_COMMAND:=make -s
|
ifeq ($(OS), Darwin)
|
||||||
|
DEPS_BUILD_COMMAND:=make -s
|
||||||
|
else
|
||||||
|
DEPS_BUILD_COMMAND:="$(BLENDER_DIR)/build_files/build_environment/linux/make_deps_wrapper.sh" -s
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -398,7 +402,7 @@ endif
|
|||||||
|
|
||||||
deps: .FORCE
|
deps: .FORCE
|
||||||
@echo
|
@echo
|
||||||
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\"
|
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\", install to \"$(DEPS_INSTALL_DIR)\"
|
||||||
|
|
||||||
@cmake -H"$(DEPS_SOURCE_DIR)" \
|
@cmake -H"$(DEPS_SOURCE_DIR)" \
|
||||||
-B"$(DEPS_BUILD_DIR)" \
|
-B"$(DEPS_BUILD_DIR)" \
|
||||||
@@ -486,22 +490,22 @@ check_smatch: .FORCE
|
|||||||
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
|
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
|
||||||
|
|
||||||
check_mypy: .FORCE
|
check_mypy: .FORCE
|
||||||
@$(PYTHON) "$(BLENDER_DIR)/source/tools/check_source/check_mypy.py"
|
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"
|
||||||
|
|
||||||
check_wiki_file_structure: .FORCE
|
check_wiki_file_structure: .FORCE
|
||||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
"$(BLENDER_DIR)/source/tools/check_wiki/check_wiki_file_structure.py"
|
"$(BLENDER_DIR)/tools/check_wiki/check_wiki_file_structure.py"
|
||||||
|
|
||||||
check_spelling_py: .FORCE
|
check_spelling_py: .FORCE
|
||||||
@cd "$(BUILD_DIR)" ; \
|
@cd "$(BUILD_DIR)" ; \
|
||||||
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
|
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
|
||||||
"$(BLENDER_DIR)/release/scripts"
|
"$(BLENDER_DIR)/scripts"
|
||||||
|
|
||||||
check_spelling_c: .FORCE
|
check_spelling_c: .FORCE
|
||||||
@cd "$(BUILD_DIR)" ; \
|
@cd "$(BUILD_DIR)" ; \
|
||||||
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
|
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
|
||||||
--cache-file=$(CHECK_SPELLING_CACHE) \
|
--cache-file=$(CHECK_SPELLING_CACHE) \
|
||||||
"$(BLENDER_DIR)/source" \
|
"$(BLENDER_DIR)/source" \
|
||||||
"$(BLENDER_DIR)/intern/cycles" \
|
"$(BLENDER_DIR)/intern/cycles" \
|
||||||
@@ -511,21 +515,21 @@ check_spelling_c: .FORCE
|
|||||||
check_spelling_osl: .FORCE
|
check_spelling_osl: .FORCE
|
||||||
@cd "$(BUILD_DIR)" ; \
|
@cd "$(BUILD_DIR)" ; \
|
||||||
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
|
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
|
||||||
--cache-file=$(CHECK_SPELLING_CACHE) \
|
--cache-file=$(CHECK_SPELLING_CACHE) \
|
||||||
"$(BLENDER_DIR)/intern/cycles/kernel/shaders"
|
"$(BLENDER_DIR)/intern/cycles/kernel/shaders"
|
||||||
|
|
||||||
check_descriptions: .FORCE
|
check_descriptions: .FORCE
|
||||||
@$(BLENDER_BIN) --background -noaudio --factory-startup --python \
|
@$(BLENDER_BIN) --background -noaudio --factory-startup --python \
|
||||||
"$(BLENDER_DIR)/source/tools/check_source/check_descriptions.py"
|
"$(BLENDER_DIR)/tools/check_source/check_descriptions.py"
|
||||||
|
|
||||||
check_deprecated: .FORCE
|
check_deprecated: .FORCE
|
||||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
source/tools/check_source/check_deprecated.py
|
tools/check_source/check_deprecated.py
|
||||||
|
|
||||||
check_licenses: .FORCE
|
check_licenses: .FORCE
|
||||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
"$(BLENDER_DIR)/source/tools/check_source/check_licenses.py" \
|
"$(BLENDER_DIR)/tools/check_source/check_licenses.py" \
|
||||||
"--show-headers=$(SHOW_HEADERS)"
|
"--show-headers=$(SHOW_HEADERS)"
|
||||||
|
|
||||||
check_pep8: .FORCE
|
check_pep8: .FORCE
|
||||||
@@ -534,7 +538,7 @@ check_pep8: .FORCE
|
|||||||
|
|
||||||
check_cmake: .FORCE
|
check_cmake: .FORCE
|
||||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||||
source/tools/check_source/check_cmake_consistency.py
|
tools/check_source/check_cmake_consistency.py
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
@@ -572,8 +576,8 @@ update_code: .FORCE
|
|||||||
@$(PYTHON) ./build_files/utils/make_update.py --no-libraries
|
@$(PYTHON) ./build_files/utils/make_update.py --no-libraries
|
||||||
|
|
||||||
format: .FORCE
|
format: .FORCE
|
||||||
@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) source/tools/utils_maintenance/clang_format_paths.py $(PATHS)
|
@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) tools/utils_maintenance/clang_format_paths.py $(PATHS)
|
||||||
@$(PYTHON) source/tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
|
@$(PYTHON) tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
@@ -24,7 +24,7 @@ Development
|
|||||||
-----------
|
-----------
|
||||||
|
|
||||||
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
|
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
|
||||||
- [Code Review & Bug Tracker](https://developer.blender.org)
|
- [Code Review & Bug Tracker](https://projects.blender.org)
|
||||||
- [Developer Forum](https://devtalk.blender.org)
|
- [Developer Forum](https://devtalk.blender.org)
|
||||||
- [Developer Documentation](https://wiki.blender.org)
|
- [Developer Documentation](https://wiki.blender.org)
|
||||||
|
|
||||||
|
@@ -10,7 +10,7 @@ ExternalProject_Add(external_epoxy
|
|||||||
URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
|
URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
|
||||||
PREFIX ${BUILD_DIR}/epoxy
|
PREFIX ${BUILD_DIR}/epoxy
|
||||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
|
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
|
||||||
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false
|
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false ${MESON_BUILD_TYPE}
|
||||||
BUILD_COMMAND ninja
|
BUILD_COMMAND ninja
|
||||||
INSTALL_COMMAND ninja install
|
INSTALL_COMMAND ninja install
|
||||||
)
|
)
|
||||||
|
@@ -9,7 +9,7 @@ ExternalProject_Add(external_fribidi
|
|||||||
URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
|
URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
|
||||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||||
PREFIX ${BUILD_DIR}/fribidi
|
PREFIX ${BUILD_DIR}/fribidi
|
||||||
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
|
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi ${MESON_BUILD_TYPE} -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
|
||||||
BUILD_COMMAND ninja
|
BUILD_COMMAND ninja
|
||||||
INSTALL_COMMAND ninja install
|
INSTALL_COMMAND ninja install
|
||||||
INSTALL_DIR ${LIBDIR}/fribidi
|
INSTALL_DIR ${LIBDIR}/fribidi
|
||||||
|
@@ -22,7 +22,7 @@ elseif(UNIX AND NOT APPLE)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Boolean crashes with Arm assembly, see T103423.
|
# Boolean crashes with Arm assembly, see #103423.
|
||||||
if(BLENDER_PLATFORM_ARM)
|
if(BLENDER_PLATFORM_ARM)
|
||||||
set(GMP_OPTIONS
|
set(GMP_OPTIONS
|
||||||
${GMP_OPTIONS}
|
${GMP_OPTIONS}
|
||||||
|
@@ -21,6 +21,7 @@ set(HARFBUZZ_EXTRA_OPTIONS
|
|||||||
# Only used for command line utilities,
|
# Only used for command line utilities,
|
||||||
# disable as this would add an addition & unnecessary build-dependency.
|
# disable as this would add an addition & unnecessary build-dependency.
|
||||||
-Dcairo=disabled
|
-Dcairo=disabled
|
||||||
|
${MESON_BUILD_TYPE}
|
||||||
)
|
)
|
||||||
|
|
||||||
ExternalProject_Add(external_harfbuzz
|
ExternalProject_Add(external_harfbuzz
|
||||||
@@ -59,3 +60,10 @@ if(BUILD_MODE STREQUAL Release AND WIN32)
|
|||||||
DEPENDEES install
|
DEPENDEES install
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_MODE STREQUAL Debug AND WIN32)
|
||||||
|
ExternalProject_Add_Step(external_harfbuzz after_install
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/harfbuzz/lib/libharfbuzz.a ${HARVEST_TARGET}/harfbuzz/lib/libharfbuzz_d.lib
|
||||||
|
DEPENDEES install
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
@@ -40,7 +40,8 @@ ExternalProject_Add(external_igc_llvm
|
|||||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
|
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
|
||||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
|
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
|
||||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
|
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
|
||||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
|
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
|
||||||
|
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
|
||||||
)
|
)
|
||||||
add_dependencies(
|
add_dependencies(
|
||||||
external_igc_llvm
|
external_igc_llvm
|
||||||
@@ -55,9 +56,6 @@ ExternalProject_Add(external_igc_spirv_translator
|
|||||||
CONFIGURE_COMMAND echo .
|
CONFIGURE_COMMAND echo .
|
||||||
BUILD_COMMAND echo .
|
BUILD_COMMAND echo .
|
||||||
INSTALL_COMMAND echo .
|
INSTALL_COMMAND echo .
|
||||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
|
|
||||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
|
|
||||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
|
|
||||||
)
|
)
|
||||||
add_dependencies(
|
add_dependencies(
|
||||||
external_igc_spirv_translator
|
external_igc_spirv_translator
|
||||||
|
@@ -15,7 +15,7 @@ llvm-config = '${LIBDIR}/llvm/bin/llvm-config'"
|
|||||||
)
|
)
|
||||||
|
|
||||||
set(MESA_EXTRA_FLAGS
|
set(MESA_EXTRA_FLAGS
|
||||||
-Dbuildtype=release
|
${MESON_BUILD_TYPE}
|
||||||
-Dc_args=${MESA_CFLAGS}
|
-Dc_args=${MESA_CFLAGS}
|
||||||
-Dcpp_args=${MESA_CXXFLAGS}
|
-Dcpp_args=${MESA_CXXFLAGS}
|
||||||
-Dc_link_args=${MESA_LDFLAGS}
|
-Dc_link_args=${MESA_LDFLAGS}
|
||||||
|
@@ -44,13 +44,21 @@ set(OPENVDB_EXTRA_ARGS
|
|||||||
# -DLLVM_DIR=${LIBDIR}/llvm/lib/cmake/llvm
|
# -DLLVM_DIR=${LIBDIR}/llvm/lib/cmake/llvm
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
|
||||||
|
if(APPLE)
|
||||||
|
set(OPENVDB_PATCH
|
||||||
|
${OPENVDB_PATCH} &&
|
||||||
|
${PATCH_CMD} -p 0 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
ExternalProject_Add(openvdb
|
ExternalProject_Add(openvdb
|
||||||
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}
|
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}
|
||||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||||
URL_HASH ${OPENVDB_HASH_TYPE}=${OPENVDB_HASH}
|
URL_HASH ${OPENVDB_HASH_TYPE}=${OPENVDB_HASH}
|
||||||
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
|
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
|
||||||
PREFIX ${BUILD_DIR}/openvdb
|
PREFIX ${BUILD_DIR}/openvdb
|
||||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff
|
PATCH_COMMAND ${OPENVDB_PATCH}
|
||||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openvdb ${DEFAULT_CMAKE_FLAGS} ${OPENVDB_EXTRA_ARGS}
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openvdb ${DEFAULT_CMAKE_FLAGS} ${OPENVDB_EXTRA_ARGS}
|
||||||
INSTALL_DIR ${LIBDIR}/openvdb
|
INSTALL_DIR ${LIBDIR}/openvdb
|
||||||
)
|
)
|
||||||
|
@@ -16,8 +16,10 @@ message("BuildMode = ${BUILD_MODE}")
|
|||||||
|
|
||||||
if(BUILD_MODE STREQUAL "Debug")
|
if(BUILD_MODE STREQUAL "Debug")
|
||||||
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
|
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
|
||||||
|
set(MESON_BUILD_TYPE -Dbuildtype=debug)
|
||||||
else()
|
else()
|
||||||
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
|
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
|
||||||
|
set(MESON_BUILD_TYPE -Dbuildtype=release)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
|
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
|
||||||
|
@@ -88,6 +88,19 @@ else()
|
|||||||
export LDFLAGS=${PYTHON_LDFLAGS} &&
|
export LDFLAGS=${PYTHON_LDFLAGS} &&
|
||||||
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
|
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
|
||||||
|
|
||||||
|
# NOTE: untested on APPLE so far.
|
||||||
|
if(NOT APPLE)
|
||||||
|
set(PYTHON_CONFIGURE_EXTRA_ARGS
|
||||||
|
${PYTHON_CONFIGURE_EXTRA_ARGS}
|
||||||
|
# Used on most release Linux builds (Fedora for e.g.),
|
||||||
|
# increases build times noticeably with the benefit of a modest speedup at runtime.
|
||||||
|
--enable-optimizations
|
||||||
|
# While LTO is OK when building on the same system, it's incompatible across GCC versions,
|
||||||
|
# making it impractical for developers to build against, so keep it disabled.
|
||||||
|
# `--with-lto`
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
ExternalProject_Add(external_python
|
ExternalProject_Add(external_python
|
||||||
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
|
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
|
||||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||||
|
@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
|||||||
# compiler, the versions used are taken from the following location
|
# compiler, the versions used are taken from the following location
|
||||||
# https://github.com/intel/intel-graphics-compiler/releases
|
# https://github.com/intel/intel-graphics-compiler/releases
|
||||||
|
|
||||||
set(IGC_VERSION 1.0.12149.1)
|
set(IGC_VERSION 1.0.13064.7)
|
||||||
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
|
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
|
||||||
set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
|
set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
|
||||||
set(IGC_HASH_TYPE SHA256)
|
set(IGC_HASH_TYPE SHA256)
|
||||||
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
|
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
|
||||||
|
|
||||||
@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
|
|||||||
#
|
#
|
||||||
# WARNING WARNING WARNING
|
# WARNING WARNING WARNING
|
||||||
|
|
||||||
set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
|
set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
|
||||||
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||||
set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
|
set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
|
||||||
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
|
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
|
||||||
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||||
|
|
||||||
set(IGC_VCINTRINSICS_VERSION v0.5.0)
|
set(IGC_VCINTRINSICS_VERSION v0.11.0)
|
||||||
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||||
set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
|
set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
|
||||||
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
|
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
|
||||||
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||||
|
|
||||||
@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
|
|||||||
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
|
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
|
||||||
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
|
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
|
||||||
|
|
||||||
set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
|
set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
|
||||||
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||||
set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
|
set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
|
||||||
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
|
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
|
||||||
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||||
|
|
||||||
@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
|
|||||||
### Intel Graphics Compiler DEPS END ###
|
### Intel Graphics Compiler DEPS END ###
|
||||||
########################################
|
########################################
|
||||||
|
|
||||||
set(GMMLIB_VERSION intel-gmmlib-22.1.8)
|
set(GMMLIB_VERSION intel-gmmlib-22.3.0)
|
||||||
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
|
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
|
||||||
set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
|
set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
|
||||||
set(GMMLIB_HASH_TYPE SHA256)
|
set(GMMLIB_HASH_TYPE SHA256)
|
||||||
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
||||||
|
|
||||||
set(OCLOC_VERSION 22.38.24278)
|
set(OCLOC_VERSION 22.49.25018.21)
|
||||||
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
||||||
set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
|
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
|
||||||
set(OCLOC_HASH_TYPE SHA256)
|
set(OCLOC_HASH_TYPE SHA256)
|
||||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||||
|
|
||||||
|
@@ -13,7 +13,7 @@ ExternalProject_Add(external_wayland
|
|||||||
# NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
|
# NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
|
||||||
# without this, math symbols such as `floor` aren't found.
|
# without this, math symbols such as `floor` aren't found.
|
||||||
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
|
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
|
||||||
${MESON} --prefix ${LIBDIR}/wayland -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
|
${MESON} --prefix ${LIBDIR}/wayland ${MESON_BUILD_TYPE} -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
|
||||||
BUILD_COMMAND ninja
|
BUILD_COMMAND ninja
|
||||||
INSTALL_COMMAND ninja install
|
INSTALL_COMMAND ninja install
|
||||||
)
|
)
|
||||||
|
@@ -7,7 +7,7 @@ ExternalProject_Add(external_wayland_protocols
|
|||||||
PREFIX ${BUILD_DIR}/wayland-protocols
|
PREFIX ${BUILD_DIR}/wayland-protocols
|
||||||
# Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
|
# Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
|
||||||
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
|
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
|
||||||
${MESON} --prefix ${LIBDIR}/wayland-protocols . ../external_wayland_protocols -Dtests=false
|
${MESON} --prefix ${LIBDIR}/wayland-protocols ${MESON_BUILD_TYPE} . ../external_wayland_protocols -Dtests=false
|
||||||
BUILD_COMMAND ninja
|
BUILD_COMMAND ninja
|
||||||
INSTALL_COMMAND ninja install
|
INSTALL_COMMAND ninja install
|
||||||
)
|
)
|
||||||
|
@@ -17,11 +17,13 @@ ExternalProject_Add(external_xvidcore
|
|||||||
INSTALL_DIR ${LIBDIR}/xvidcore
|
INSTALL_DIR ${LIBDIR}/xvidcore
|
||||||
)
|
)
|
||||||
|
|
||||||
ExternalProject_Add_Step(external_xvidcore after_install
|
if(WIN32)
|
||||||
COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
|
ExternalProject_Add_Step(external_xvidcore after_install
|
||||||
COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
|
COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
|
||||||
DEPENDEES install
|
COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
|
||||||
)
|
DEPENDEES install
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
set_target_properties(external_xvidcore PROPERTIES FOLDER Mingw)
|
set_target_properties(external_xvidcore PROPERTIES FOLDER Mingw)
|
||||||
|
74
build_files/build_environment/linux/make_deps_wrapper.sh
Executable file
74
build_files/build_environment/linux/make_deps_wrapper.sh
Executable file
@@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
# This script ensures:
|
||||||
|
# - One dependency is built at a time.
|
||||||
|
# - That dependency uses all available cores.
|
||||||
|
#
|
||||||
|
# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
|
||||||
|
# directory will build many projects at once.
|
||||||
|
#
|
||||||
|
# This is undesirable for the following reasons:
|
||||||
|
#
|
||||||
|
# - The output from projects is mixed together,
|
||||||
|
# making it difficult to track down the cause of a build failure.
|
||||||
|
#
|
||||||
|
# - Larger dependencies such as LLVM can bottleneck the build process,
|
||||||
|
# making it necessary to cancel the build and manually run build commands in each directory.
|
||||||
|
#
|
||||||
|
# - Building many projects at once means canceling (Control-C) can lead to the build being in an undefined state.
|
||||||
|
# It's possible canceling happens as a patch is being applied or files are being copied.
|
||||||
|
# (steps that aren't part of the compilation process where it's typically safe to cancel).
|
||||||
|
|
||||||
|
if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
|
||||||
|
export MY_MAKE_CALL_LEVEL=0
|
||||||
|
export MY_MAKEFLAGS=$MAKEFLAGS
|
||||||
|
|
||||||
|
# Extract the jobs argument (`-jN`, `-j N`, `--jobs=N`).
|
||||||
|
add_next=0
|
||||||
|
for i in "$@"; do
|
||||||
|
case $i in
|
||||||
|
-j*)
|
||||||
|
export MY_JOBS_ARG=$i
|
||||||
|
if [ "$MY_JOBS_ARG" = "-j" ]; then
|
||||||
|
add_next=1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--jobs=*)
|
||||||
|
shift # past argument=value
|
||||||
|
MY_JOBS_ARG=$i
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if (( add_next == 1 )); then
|
||||||
|
MY_JOBS_ARG="$MY_JOBS_ARG $i"
|
||||||
|
add_next=0
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
unset i add_next
|
||||||
|
|
||||||
|
if [[ -z "$MY_JOBS_ARG" ]]; then
|
||||||
|
MY_JOBS_ARG="-j$(nproc)"
|
||||||
|
fi
|
||||||
|
export MY_JOBS_ARG
|
||||||
|
# Support user defined `MAKEFLAGS`.
|
||||||
|
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
|
||||||
|
else
|
||||||
|
export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
|
||||||
|
if (( MY_MAKE_CALL_LEVEL == 1 )); then
|
||||||
|
# Important to set jobs to 1, otherwise user defined jobs argument is used.
|
||||||
|
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
|
||||||
|
elif (( MY_MAKE_CALL_LEVEL == 2 )); then
|
||||||
|
# This is the level used by each sub-project.
|
||||||
|
export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
|
||||||
|
fi
|
||||||
|
# Else leave `MY_MAKEFLAGS` flags as-is, avoids setting a high number of jobs on recursive
|
||||||
|
# calls (which may easily run out of memory). Let the job-server handle the rest.
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Useful for troubleshooting the wrapper.
|
||||||
|
# echo "Call level: $MY_MAKE_CALL_LEVEL, args=$@".
|
||||||
|
|
||||||
|
# Call actual make but ensure recursive calls run via this script.
|
||||||
|
exec make MAKE="$0" "$@"
|
@@ -1,7 +1,7 @@
|
|||||||
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
|
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
|
||||||
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
|
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
|
||||||
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
|
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
|
||||||
@@ -126,22 +126,24 @@
|
@@ -147,22 +147,24 @@
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
8007
build_files/build_environment/patches/openvdb_metal.diff
Normal file
8007
build_files/build_environment/patches/openvdb_metal.diff
Normal file
File diff suppressed because it is too large
Load Diff
@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
|||||||
|
|
||||||
if(MY_WC_BRANCH STREQUAL "HEAD")
|
if(MY_WC_BRANCH STREQUAL "HEAD")
|
||||||
# Detached HEAD, check whether commit hash is reachable
|
# Detached HEAD, check whether commit hash is reachable
|
||||||
# in the master branch
|
# in the main branch
|
||||||
execute_process(COMMAND git rev-parse --short=12 HEAD
|
execute_process(COMMAND git rev-parse --short=12 HEAD
|
||||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||||
OUTPUT_VARIABLE MY_WC_HASH
|
OUTPUT_VARIABLE MY_WC_HASH
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
|
execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
|
||||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||||
OUTPUT_VARIABLE _git_contains_check
|
OUTPUT_VARIABLE _git_contains_check
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
if(NOT _git_contains_check STREQUAL "")
|
if(NOT _git_contains_check STREQUAL "")
|
||||||
set(MY_WC_BRANCH "master")
|
set(MY_WC_BRANCH "main")
|
||||||
else()
|
else()
|
||||||
execute_process(COMMAND git show-ref --tags -d
|
execute_process(COMMAND git show-ref --tags -d
|
||||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||||
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
|||||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
if(_git_tag_hashes MATCHES "${_git_head_hash}")
|
if(_git_tag_hashes MATCHES "${_git_head_hash}")
|
||||||
set(MY_WC_BRANCH "master")
|
set(MY_WC_BRANCH "main")
|
||||||
else()
|
else()
|
||||||
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
|
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
|
||||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||||
|
@@ -85,7 +85,7 @@ if(NOT APPLE)
|
|||||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -11,11 +11,11 @@
|
|||||||
mkdir ~/blender-git
|
mkdir ~/blender-git
|
||||||
cd ~/blender-git
|
cd ~/blender-git
|
||||||
|
|
||||||
git clone http://git.blender.org/blender.git
|
git clone https://projects.blender.org/blender/blender.git
|
||||||
cd blender
|
cd blender
|
||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
git submodule foreach git checkout master
|
git submodule foreach git checkout main
|
||||||
git submodule foreach git pull --rebase origin master
|
git submodule foreach git pull --rebase origin main
|
||||||
|
|
||||||
# create build dir
|
# create build dir
|
||||||
mkdir ~/blender-git/build-cmake
|
mkdir ~/blender-git/build-cmake
|
||||||
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
|
|||||||
echo ""
|
echo ""
|
||||||
echo "* Useful Commands *"
|
echo "* Useful Commands *"
|
||||||
echo " Run Blender: ~/blender-git/blender/blender.bin"
|
echo " Run Blender: ~/blender-git/blender/blender.bin"
|
||||||
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
|
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
|
||||||
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
|
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
|
||||||
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
|
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
|
||||||
echo ""
|
echo ""
|
||||||
|
@@ -544,7 +544,7 @@ endfunction()
|
|||||||
function(setup_platform_linker_libs
|
function(setup_platform_linker_libs
|
||||||
target
|
target
|
||||||
)
|
)
|
||||||
# jemalloc must be early in the list, to be before pthread (see T57998)
|
# jemalloc must be early in the list, to be before pthread (see #57998).
|
||||||
if(WITH_MEM_JEMALLOC)
|
if(WITH_MEM_JEMALLOC)
|
||||||
target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
|
target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
@@ -440,7 +440,7 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
|
|||||||
# Make stack size more similar to Embree, required for Embree.
|
# Make stack size more similar to Embree, required for Embree.
|
||||||
string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")
|
string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")
|
||||||
|
|
||||||
# Suppress ranlib "has no symbols" warnings (workaround for T48250)
|
# Suppress ranlib "has no symbols" warnings (workaround for #48250).
|
||||||
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||||
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||||
# llvm-ranlib doesn't support this flag. Xcode's libtool does.
|
# llvm-ranlib doesn't support this flag. Xcode's libtool does.
|
||||||
|
@@ -121,7 +121,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
|
|||||||
include(InstallRequiredSystemLibraries)
|
include(InstallRequiredSystemLibraries)
|
||||||
|
|
||||||
# ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
|
# ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
|
||||||
# redirects for this dll, for details see T88813.
|
# redirects for this dll, for details see #88813.
|
||||||
foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
|
foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
|
||||||
string(FIND ${lib} "ucrtbase" pos)
|
string(FIND ${lib} "ucrtbase" pos)
|
||||||
if(NOT pos EQUAL -1)
|
if(NOT pos EQUAL -1)
|
||||||
@@ -295,7 +295,7 @@ unset(MATERIALX_LIB_FOLDER_EXISTS)
|
|||||||
if(NOT MSVC_CLANG AND # Available with MSVC 15.7+ but not for CLANG.
|
if(NOT MSVC_CLANG AND # Available with MSVC 15.7+ but not for CLANG.
|
||||||
NOT WITH_WINDOWS_SCCACHE AND # And not when sccache is enabled
|
NOT WITH_WINDOWS_SCCACHE AND # And not when sccache is enabled
|
||||||
NOT VS_CLANG_TIDY) # Clang-tidy does not like these options
|
NOT VS_CLANG_TIDY) # Clang-tidy does not like these options
|
||||||
add_compile_options(/experimental:external /external:templates- /external:I "${LIBDIR}" /external:W0)
|
add_compile_options(/experimental:external /external:I "${LIBDIR}" /external:W0)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Add each of our libraries to our cmake_prefix_path so find_package() could work
|
# Add each of our libraries to our cmake_prefix_path so find_package() could work
|
||||||
@@ -901,11 +901,11 @@ endif()
|
|||||||
|
|
||||||
if(WINDOWS_PYTHON_DEBUG)
|
if(WINDOWS_PYTHON_DEBUG)
|
||||||
# Include the system scripts in the blender_python_system_scripts project.
|
# Include the system scripts in the blender_python_system_scripts project.
|
||||||
file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/release/scripts/*.*" )
|
file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/scripts/*.*" )
|
||||||
add_custom_target(blender_python_system_scripts SOURCES ${inFiles})
|
add_custom_target(blender_python_system_scripts SOURCES ${inFiles})
|
||||||
foreach(_source IN ITEMS ${inFiles})
|
foreach(_source IN ITEMS ${inFiles})
|
||||||
get_filename_component(_source_path "${_source}" PATH)
|
get_filename_component(_source_path "${_source}" PATH)
|
||||||
string(REPLACE "${CMAKE_SOURCE_DIR}/release/scripts/" "" _source_path "${_source_path}")
|
string(REPLACE "${CMAKE_SOURCE_DIR}/scripts/" "" _source_path "${_source_path}")
|
||||||
string(REPLACE "/" "\\" _group_path "${_source_path}")
|
string(REPLACE "/" "\\" _group_path "${_source_path}")
|
||||||
source_group("${_group_path}" FILES "${_source}")
|
source_group("${_group_path}" FILES "${_source}")
|
||||||
endforeach()
|
endforeach()
|
||||||
@@ -940,7 +940,7 @@ if(WINDOWS_PYTHON_DEBUG)
|
|||||||
file(WRITE ${USER_PROPS_FILE} "<?xml version=\"1.0\" encoding=\"utf-8\"?>
|
file(WRITE ${USER_PROPS_FILE} "<?xml version=\"1.0\" encoding=\"utf-8\"?>
|
||||||
<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">
|
<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/release/scripts\" </LocalDebuggerCommandArguments>
|
<LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/scripts\" </LocalDebuggerCommandArguments>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
</Project>")
|
</Project>")
|
||||||
endif()
|
endif()
|
||||||
|
@@ -142,7 +142,7 @@ def cmake_advanced_info() -> Union[Tuple[List[str], List[Tuple[str, str]]], Tupl
|
|||||||
|
|
||||||
make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
|
make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
|
||||||
if make_exe is None:
|
if make_exe is None:
|
||||||
print("Make command not found in: %r not found" % project_path)
|
print("Make command not found: CMAKE_MAKE_PROGRAM")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
make_exe_basename = os.path.basename(make_exe)
|
make_exe_basename = os.path.basename(make_exe)
|
||||||
|
@@ -1,53 +1,3 @@
|
|||||||
#
|
|
||||||
# Used by Buildbot build pipeline make_update.py script only for now
|
|
||||||
# We intended to update the make_update.py in the branches to use this file eventually
|
|
||||||
#
|
|
||||||
update-code:
|
|
||||||
git:
|
|
||||||
submodules:
|
|
||||||
- branch: master
|
|
||||||
commit_id: HEAD
|
|
||||||
path: release/scripts/addons
|
|
||||||
- branch: master
|
|
||||||
commit_id: HEAD
|
|
||||||
path: release/scripts/addons_contrib
|
|
||||||
- branch: master
|
|
||||||
commit_id: HEAD
|
|
||||||
path: release/datafiles/locale
|
|
||||||
- branch: master
|
|
||||||
commit_id: HEAD
|
|
||||||
path: source/tools
|
|
||||||
svn:
|
|
||||||
libraries:
|
|
||||||
darwin-arm64:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/darwin_arm64
|
|
||||||
darwin-x86_64:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/darwin
|
|
||||||
linux-x86_64:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/linux_x86_64_glibc_228
|
|
||||||
windows-amd64:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/win64_vc15
|
|
||||||
tests:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/tests
|
|
||||||
benchmarks:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/benchmarks
|
|
||||||
assets:
|
|
||||||
branch: trunk
|
|
||||||
commit_id: HEAD
|
|
||||||
path: lib/assets
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Buildbot only configs
|
# Buildbot only configs
|
||||||
#
|
#
|
||||||
|
@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
|
|||||||
## Source Code
|
## Source Code
|
||||||
|
|
||||||
* [Releases](https://download.blender.org/source/)
|
* [Releases](https://download.blender.org/source/)
|
||||||
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
|
* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)
|
||||||
|
|
||||||
## Credits
|
## Credits
|
||||||
|
|
||||||
|
@@ -135,7 +135,7 @@ def submodules_to_manifest(
|
|||||||
submodule = line.split()[1]
|
submodule = line.split()[1]
|
||||||
|
|
||||||
# Don't use native slashes as GIT for MS-Windows outputs forward slashes.
|
# Don't use native slashes as GIT for MS-Windows outputs forward slashes.
|
||||||
if skip_addon_contrib and submodule == "release/scripts/addons_contrib":
|
if skip_addon_contrib and submodule == "scripts/addons_contrib":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for path in git_ls_files(blender_srcdir / submodule):
|
for path in git_ls_files(blender_srcdir / submodule):
|
||||||
|
@@ -16,14 +16,28 @@ import shutil
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import make_utils
|
import make_utils
|
||||||
|
from pathlib import Path
|
||||||
from make_utils import call, check_output
|
from make_utils import call, check_output
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from typing import (
|
from typing import (
|
||||||
List,
|
List,
|
||||||
|
Iterable,
|
||||||
Optional,
|
Optional,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Submodule:
|
||||||
|
path: str
|
||||||
|
branch: str
|
||||||
|
branch_fallback: str
|
||||||
|
|
||||||
|
def __init__(self, path: str, branch: str, branch_fallback: str) -> None:
|
||||||
|
self.path = path
|
||||||
|
self.branch = branch
|
||||||
|
self.branch_fallback = branch_fallback
|
||||||
|
|
||||||
|
|
||||||
def print_stage(text: str) -> None:
|
def print_stage(text: str) -> None:
|
||||||
print("")
|
print("")
|
||||||
print(text)
|
print(text)
|
||||||
@@ -42,6 +56,7 @@ def parse_arguments() -> argparse.Namespace:
|
|||||||
parser.add_argument("--svn-branch", default=None)
|
parser.add_argument("--svn-branch", default=None)
|
||||||
parser.add_argument("--git-command", default="git")
|
parser.add_argument("--git-command", default="git")
|
||||||
parser.add_argument("--use-linux-libraries", action="store_true")
|
parser.add_argument("--use-linux-libraries", action="store_true")
|
||||||
|
parser.add_argument("--architecture", type=str, choices=("x86_64", "amd64", "arm64",))
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -51,6 +66,19 @@ def get_blender_git_root() -> str:
|
|||||||
# Setup for precompiled libraries and tests from svn.
|
# Setup for precompiled libraries and tests from svn.
|
||||||
|
|
||||||
|
|
||||||
|
def get_effective_architecture(args: argparse.Namespace) -> str:
|
||||||
|
architecture = args.architecture
|
||||||
|
if architecture:
|
||||||
|
assert isinstance(architecture, str)
|
||||||
|
return architecture
|
||||||
|
|
||||||
|
# Check platform.version to detect arm64 with x86_64 python binary.
|
||||||
|
if "ARM64" in platform.version():
|
||||||
|
return "arm64"
|
||||||
|
|
||||||
|
return platform.machine().lower()
|
||||||
|
|
||||||
|
|
||||||
def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
|
def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
|
||||||
svn_non_interactive = [args.svn_command, '--non-interactive']
|
svn_non_interactive = [args.svn_command, '--non-interactive']
|
||||||
|
|
||||||
@@ -58,11 +86,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
|
|||||||
svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)
|
svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)
|
||||||
|
|
||||||
# Checkout precompiled libraries
|
# Checkout precompiled libraries
|
||||||
|
architecture = get_effective_architecture(args)
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
# Check platform.version to detect arm64 with x86_64 python binary.
|
if architecture == 'arm64':
|
||||||
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
|
|
||||||
lib_platform = "darwin_arm64"
|
lib_platform = "darwin_arm64"
|
||||||
elif platform.machine() == 'x86_64':
|
elif architecture == 'x86_64':
|
||||||
lib_platform = "darwin"
|
lib_platform = "darwin"
|
||||||
else:
|
else:
|
||||||
lib_platform = None
|
lib_platform = None
|
||||||
@@ -170,7 +198,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
|
|||||||
return "rebase or merge in progress, complete it first"
|
return "rebase or merge in progress, complete it first"
|
||||||
|
|
||||||
# Abort if uncommitted changes.
|
# Abort if uncommitted changes.
|
||||||
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
|
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
|
||||||
if len(changes) != 0:
|
if len(changes) != 0:
|
||||||
return "you have unstaged changes"
|
return "you have unstaged changes"
|
||||||
|
|
||||||
@@ -184,97 +212,282 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def use_upstream_workflow(args: argparse.Namespace) -> bool:
|
||||||
|
return make_utils.git_remote_exist(args.git_command, "upstream")
|
||||||
|
|
||||||
|
|
||||||
|
def work_tree_update_upstream_workflow(args: argparse.Namespace, use_fetch=True) -> str:
|
||||||
|
"""
|
||||||
|
Update the Blender repository using the Github style of fork organization
|
||||||
|
|
||||||
|
Returns true if the current local branch has been updated to the upstream state.
|
||||||
|
Otherwise false is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
branch_name = make_utils.git_branch(args.git_command)
|
||||||
|
|
||||||
|
if use_fetch:
|
||||||
|
call((args.git_command, "fetch", "upstream"))
|
||||||
|
|
||||||
|
upstream_branch = f"upstream/{branch_name}"
|
||||||
|
if not make_utils.git_branch_exists(args.git_command, upstream_branch):
|
||||||
|
return "no_branch"
|
||||||
|
|
||||||
|
retcode = call((args.git_command, "merge", "--ff-only", upstream_branch), exit_on_error=False)
|
||||||
|
if retcode != 0:
|
||||||
|
return "Unable to fast forward\n"
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def work_tree_update(args: argparse.Namespace, use_fetch=True) -> str:
|
||||||
|
"""
|
||||||
|
Update the Git working tree using the best strategy
|
||||||
|
|
||||||
|
This function detects whether it is a github style of fork remote organization is used, or
|
||||||
|
is it a repository which origin is an upstream.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if use_upstream_workflow(args):
|
||||||
|
message = work_tree_update_upstream_workflow(args, use_fetch)
|
||||||
|
if message != "no_branch":
|
||||||
|
return message
|
||||||
|
|
||||||
|
# If there is upstream configured but the local branch is not in the upstream, try to
|
||||||
|
# update the branch from the fork.
|
||||||
|
|
||||||
|
update_command = [args.git_command, "pull", "--rebase"]
|
||||||
|
|
||||||
|
call(update_command)
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# Update blender repository.
|
# Update blender repository.
|
||||||
def blender_update(args: argparse.Namespace) -> None:
|
def blender_update(args: argparse.Namespace) -> str:
|
||||||
print_stage("Updating Blender Git Repository")
|
print_stage("Updating Blender Git Repository")
|
||||||
call([args.git_command, "pull", "--rebase"])
|
|
||||||
|
return work_tree_update(args)
|
||||||
|
|
||||||
|
|
||||||
# Update submodules.
|
def resolve_external_url(blender_url: str, repo_name: str) -> str:
|
||||||
def submodules_update(
|
return urljoin(blender_url + "/", "../" + repo_name)
|
||||||
args: argparse.Namespace,
|
|
||||||
release_version: Optional[str],
|
|
||||||
branch: Optional[str],
|
|
||||||
) -> str:
|
|
||||||
print_stage("Updating Submodules")
|
|
||||||
if make_utils.command_missing(args.git_command):
|
|
||||||
sys.stderr.write("git not found, can't update code\n")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Update submodules to appropriate given branch,
|
|
||||||
# falling back to master if none is given and/or found in a sub-repository.
|
def external_script_copy_old_submodule_over(args: argparse.Namespace, directory_name: str) -> None:
|
||||||
branch_fallback = "master"
|
blender_git_root = Path(get_blender_git_root())
|
||||||
|
scripts_dir = blender_git_root / "scripts"
|
||||||
|
external_dir = scripts_dir / directory_name
|
||||||
|
|
||||||
|
old_submodule_relative_dir = Path("release") / "scripts" / directory_name
|
||||||
|
print(f"Moving {old_submodule_relative_dir} to scripts/{directory_name} ...")
|
||||||
|
|
||||||
|
old_submodule_dir = blender_git_root / old_submodule_relative_dir
|
||||||
|
shutil.move(old_submodule_dir, external_dir)
|
||||||
|
|
||||||
|
# Remove old ".git" which is a file with path to a submodule bare repo inside of main
|
||||||
|
# repo .git/modules directory.
|
||||||
|
(external_dir / ".git").unlink()
|
||||||
|
|
||||||
|
bare_repo_relative_dir = Path(".git") / "modules" / "release" / "scripts" / directory_name
|
||||||
|
print(f"Copying {bare_repo_relative_dir} to scripts/{directory_name}/.git ...")
|
||||||
|
bare_repo_dir = blender_git_root / bare_repo_relative_dir
|
||||||
|
shutil.copytree(bare_repo_dir, external_dir / ".git")
|
||||||
|
|
||||||
|
git_config = external_dir / ".git" / "config"
|
||||||
|
call((args.git_command, "config", "--file", git_config, "--unset", "core.worktree"))
|
||||||
|
|
||||||
|
|
||||||
|
def external_script_initialize_if_needed(args: argparse.Namespace,
|
||||||
|
repo_name: str,
|
||||||
|
directory_name: str) -> None:
|
||||||
|
"""Initialize checkout of an external repository scripts directory"""
|
||||||
|
|
||||||
|
blender_git_root = Path(get_blender_git_root())
|
||||||
|
blender_dot_git = blender_git_root / ".git"
|
||||||
|
scripts_dir = blender_git_root / "scripts"
|
||||||
|
external_dir = scripts_dir / directory_name
|
||||||
|
|
||||||
|
if external_dir.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Initializing scripts/{directory_name} ...")
|
||||||
|
|
||||||
|
old_submodule_dot_git = blender_git_root / "release" / "scripts" / directory_name / ".git"
|
||||||
|
if old_submodule_dot_git.exists() and blender_dot_git.is_dir():
|
||||||
|
external_script_copy_old_submodule_over(args, directory_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
origin_name = "upstream" if use_upstream_workflow(args) else "origin"
|
||||||
|
blender_url = make_utils.git_get_remote_url(args.git_command, origin_name)
|
||||||
|
external_url = resolve_external_url(blender_url, repo_name)
|
||||||
|
|
||||||
|
call((args.git_command, "clone", "--origin", origin_name, external_url, external_dir))
|
||||||
|
|
||||||
|
|
||||||
|
def external_script_add_origin_if_needed(args: argparse.Namespace,
|
||||||
|
repo_name: str,
|
||||||
|
directory_name: str) -> str:
|
||||||
|
"""
|
||||||
|
Add remote called 'origin' if there is a fork of the external repository available
|
||||||
|
|
||||||
|
This is only done when using Github style upstream workflow in the main repository.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not use_upstream_workflow(args):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
cwd = os.getcwd()
|
||||||
|
|
||||||
|
blender_git_root = Path(get_blender_git_root())
|
||||||
|
scripts_dir = blender_git_root / "scripts"
|
||||||
|
external_dir = scripts_dir / directory_name
|
||||||
|
|
||||||
|
origin_blender_url = make_utils.git_get_remote_url(args.git_command, "origin")
|
||||||
|
origin_external_url = resolve_external_url(origin_blender_url, repo_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.chdir(external_dir)
|
||||||
|
|
||||||
|
if (make_utils.git_remote_exist(args.git_command, "origin") or
|
||||||
|
not make_utils.git_remote_exist(args.git_command, "upstream")):
|
||||||
|
return
|
||||||
|
|
||||||
|
if not make_utils.git_is_remote_repository(args.git_command, origin_external_url):
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Adding origin remote to {directory_name} pointing to fork ...")
|
||||||
|
|
||||||
|
# Non-obvious tricks to introduce the new remote called "origin" to the existing
|
||||||
|
# submodule configuration.
|
||||||
|
#
|
||||||
|
# This is all within the content of creating a fork of a submodule after `make update`
|
||||||
|
# has been run and possibly local branches tracking upstream were added.
|
||||||
|
#
|
||||||
|
# The idea here goes as following:
|
||||||
|
#
|
||||||
|
# - Rename remote "upstream" to "origin", which takes care of changing the names of
|
||||||
|
# remotes the local branches are tracking.
|
||||||
|
#
|
||||||
|
# - Change the URL to the "origin", which so was was still pointing to upstream.
|
||||||
|
#
|
||||||
|
# - Re-introduce the "upstream" remote, with the same URL as it had prior to rename.
|
||||||
|
|
||||||
|
upstream_url = make_utils.git_get_remote_url(args.git_command, "upstream")
|
||||||
|
|
||||||
|
call((args.git_command, "remote", "rename", "upstream", "origin"))
|
||||||
|
make_utils.git_set_config(args.git_command, f"remote.origin.url", origin_external_url)
|
||||||
|
|
||||||
|
call((args.git_command, "remote", "add", "upstream", upstream_url))
|
||||||
|
finally:
|
||||||
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def external_scripts_update(args: argparse.Namespace,
|
||||||
|
repo_name: str,
|
||||||
|
directory_name: str,
|
||||||
|
branch: Optional[str]) -> str:
|
||||||
|
"""Update a single external checkout with the given name in the scripts folder"""
|
||||||
|
|
||||||
|
external_script_initialize_if_needed(args, repo_name, directory_name)
|
||||||
|
external_script_add_origin_if_needed(args, repo_name, directory_name)
|
||||||
|
|
||||||
|
print(f"Updating scripts/{directory_name} ...")
|
||||||
|
|
||||||
|
cwd = os.getcwd()
|
||||||
|
|
||||||
|
blender_git_root = Path(get_blender_git_root())
|
||||||
|
scripts_dir = blender_git_root / "scripts"
|
||||||
|
external_dir = scripts_dir / directory_name
|
||||||
|
|
||||||
|
# Update externals to appropriate given branch, falling back to main if none is given and/or
|
||||||
|
# found in a sub-repository.
|
||||||
|
branch_fallback = "main"
|
||||||
if not branch:
|
if not branch:
|
||||||
branch = branch_fallback
|
branch = branch_fallback
|
||||||
|
|
||||||
submodules = [
|
|
||||||
("release/scripts/addons", branch, branch_fallback),
|
|
||||||
("release/scripts/addons_contrib", branch, branch_fallback),
|
|
||||||
("release/datafiles/locale", branch, branch_fallback),
|
|
||||||
("source/tools", branch, branch_fallback),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Initialize submodules only if needed.
|
|
||||||
for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
|
|
||||||
if not os.path.exists(os.path.join(submodule_path, ".git")):
|
|
||||||
call([args.git_command, "submodule", "update", "--init", "--recursive"])
|
|
||||||
break
|
|
||||||
|
|
||||||
# Checkout appropriate branch and pull changes.
|
|
||||||
skip_msg = ""
|
skip_msg = ""
|
||||||
for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
|
|
||||||
cwd = os.getcwd()
|
|
||||||
try:
|
|
||||||
os.chdir(submodule_path)
|
|
||||||
msg = git_update_skip(args, check_remote_exists=False)
|
|
||||||
if msg:
|
|
||||||
skip_msg += submodule_path + " skipped: " + msg + "\n"
|
|
||||||
else:
|
|
||||||
# Find a matching branch that exists.
|
|
||||||
call([args.git_command, "fetch", "origin"])
|
|
||||||
if make_utils.git_branch_exists(args.git_command, submodule_branch):
|
|
||||||
pass
|
|
||||||
elif make_utils.git_branch_exists(args.git_command, submodule_branch_fallback):
|
|
||||||
submodule_branch = submodule_branch_fallback
|
|
||||||
else:
|
|
||||||
# Skip.
|
|
||||||
submodule_branch = ""
|
|
||||||
|
|
||||||
# Switch to branch and pull.
|
try:
|
||||||
if submodule_branch:
|
os.chdir(external_dir)
|
||||||
if make_utils.git_branch(args.git_command) != submodule_branch:
|
msg = git_update_skip(args, check_remote_exists=False)
|
||||||
call([args.git_command, "checkout", submodule_branch])
|
if msg:
|
||||||
call([args.git_command, "pull", "--rebase", "origin", submodule_branch])
|
skip_msg += directory_name + " skipped: " + msg + "\n"
|
||||||
finally:
|
else:
|
||||||
os.chdir(cwd)
|
# Find a matching branch that exists.
|
||||||
|
for remote in ("origin", "upstream"):
|
||||||
|
if make_utils.git_remote_exist(args.git_command, remote):
|
||||||
|
call([args.git_command, "fetch", remote])
|
||||||
|
|
||||||
|
submodule_branch = branch
|
||||||
|
|
||||||
|
if make_utils.git_branch_exists(args.git_command, submodule_branch):
|
||||||
|
pass
|
||||||
|
elif make_utils.git_branch_exists(args.git_command, branch_fallback):
|
||||||
|
submodule_branch = branch_fallback
|
||||||
|
else:
|
||||||
|
# Skip.
|
||||||
|
submodule_branch = ""
|
||||||
|
|
||||||
|
# Switch to branch and pull.
|
||||||
|
if submodule_branch:
|
||||||
|
if make_utils.git_branch(args.git_command) != submodule_branch:
|
||||||
|
call([args.git_command, "checkout", submodule_branch])
|
||||||
|
# Don't use extra fetch since all remotes of interest have been already fetched
|
||||||
|
# some lines above.
|
||||||
|
skip_msg += work_tree_update(args, use_fetch=False)
|
||||||
|
finally:
|
||||||
|
os.chdir(cwd)
|
||||||
|
|
||||||
return skip_msg
|
return skip_msg
|
||||||
|
|
||||||
|
|
||||||
|
def scripts_submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
|
||||||
|
"""Update working trees of addons and addons_contrib within the scripts/ directory"""
|
||||||
|
msg = ""
|
||||||
|
|
||||||
|
msg += external_scripts_update(args, "blender-addons", "addons", branch)
|
||||||
|
msg += external_scripts_update(args, "blender-addons-contrib", "addons_contrib", branch)
|
||||||
|
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
|
def submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
|
||||||
|
"""Update submodules or other externally tracked source trees"""
|
||||||
|
msg = ""
|
||||||
|
|
||||||
|
msg += scripts_submodules_update(args, branch)
|
||||||
|
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
blender_skip_msg = ""
|
blender_skip_msg = ""
|
||||||
submodules_skip_msg = ""
|
submodules_skip_msg = ""
|
||||||
|
|
||||||
# Test if we are building a specific release version.
|
blender_version = make_utils. parse_blender_version()
|
||||||
branch = make_utils.git_branch(args.git_command)
|
if blender_version.cycle != 'alpha':
|
||||||
if branch == 'HEAD':
|
major = blender_version.version // 100
|
||||||
sys.stderr.write('Blender git repository is in detached HEAD state, must be in a branch\n')
|
minor = blender_version.version % 100
|
||||||
sys.exit(1)
|
branch = f"blender-v{major}.{minor}-release"
|
||||||
|
release_version: Optional[str] = f"{major}.{minor}"
|
||||||
tag = make_utils.git_tag(args.git_command)
|
else:
|
||||||
release_version = make_utils.git_branch_release_version(branch, tag)
|
branch = 'main'
|
||||||
|
release_version = None
|
||||||
|
|
||||||
if not args.no_libraries:
|
if not args.no_libraries:
|
||||||
svn_update(args, release_version)
|
svn_update(args, release_version)
|
||||||
if not args.no_blender:
|
if not args.no_blender:
|
||||||
blender_skip_msg = git_update_skip(args)
|
blender_skip_msg = git_update_skip(args)
|
||||||
|
if not blender_skip_msg:
|
||||||
|
blender_skip_msg = blender_update(args)
|
||||||
if blender_skip_msg:
|
if blender_skip_msg:
|
||||||
blender_skip_msg = "Blender repository skipped: " + blender_skip_msg + "\n"
|
blender_skip_msg = "Blender repository skipped: " + blender_skip_msg + "\n"
|
||||||
else:
|
|
||||||
blender_update(args)
|
|
||||||
if not args.no_submodules:
|
if not args.no_submodules:
|
||||||
submodules_skip_msg = submodules_update(args, release_version, branch)
|
submodules_skip_msg = submodules_update(args, branch)
|
||||||
|
|
||||||
# Report any skipped repositories at the end, so it's not as easy to miss.
|
# Report any skipped repositories at the end, so it's not as easy to miss.
|
||||||
skip_msg = blender_skip_msg + submodules_skip_msg
|
skip_msg = blender_skip_msg + submodules_skip_msg
|
||||||
|
@@ -9,7 +9,9 @@ import re
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from typing import (
|
from typing import (
|
||||||
Sequence,
|
Sequence,
|
||||||
@@ -19,7 +21,7 @@ from typing import (
|
|||||||
|
|
||||||
def call(cmd: Sequence[str], exit_on_error: bool = True, silent: bool = False) -> int:
|
def call(cmd: Sequence[str], exit_on_error: bool = True, silent: bool = False) -> int:
|
||||||
if not silent:
|
if not silent:
|
||||||
print(" ".join(cmd))
|
print(" ".join([str(x) for x in cmd]))
|
||||||
|
|
||||||
# Flush to ensure correct order output on Windows.
|
# Flush to ensure correct order output on Windows.
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
@@ -55,10 +57,48 @@ def check_output(cmd: Sequence[str], exit_on_error: bool = True) -> str:
|
|||||||
def git_branch_exists(git_command: str, branch: str) -> bool:
|
def git_branch_exists(git_command: str, branch: str) -> bool:
|
||||||
return (
|
return (
|
||||||
call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or
|
call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or
|
||||||
|
call([git_command, "rev-parse", "--verify", "remotes/upstream/" + branch], exit_on_error=False, silent=True) == 0 or
|
||||||
call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
|
call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def git_get_remote_url(git_command: str, remote_name: str) -> bool:
|
||||||
|
return check_output((git_command, "ls-remote", "--get-url", remote_name))
|
||||||
|
|
||||||
|
|
||||||
|
def git_remote_exist(git_command: str, remote_name: str) -> bool:
|
||||||
|
"""Check whether there is a remote with the given name"""
|
||||||
|
# `git ls-remote --get-url upstream` will print an URL if there is such remote configured, and
|
||||||
|
# otherwise will print "upstream".
|
||||||
|
remote_url = check_output((git_command, "ls-remote", "--get-url", remote_name))
|
||||||
|
return remote_url != remote_name
|
||||||
|
|
||||||
|
|
||||||
|
def git_get_resolved_submodule_url(git_command: str, blender_url: str, submodule_path: str) -> str:
|
||||||
|
git_root = check_output([git_command, "rev-parse", "--show-toplevel"])
|
||||||
|
dot_gitmodules = os.path.join(git_root, ".gitmodules")
|
||||||
|
|
||||||
|
submodule_key_prefix = f"submodule.{submodule_path}"
|
||||||
|
submodule_key_url = f"{submodule_key_prefix}.url"
|
||||||
|
|
||||||
|
gitmodule_url = git_get_config(
|
||||||
|
git_command, submodule_key_url, file=dot_gitmodules)
|
||||||
|
|
||||||
|
# A bit of a trickery to construct final URL.
|
||||||
|
# Only works for the relative submodule URLs.
|
||||||
|
#
|
||||||
|
# Note that unless the LHS URL ends up with a slash urljoin treats the last component as a
|
||||||
|
# file.
|
||||||
|
assert gitmodule_url.startswith('..')
|
||||||
|
return urljoin(blender_url + "/", gitmodule_url)
|
||||||
|
|
||||||
|
|
||||||
|
def git_is_remote_repository(git_command: str, repo: str) -> bool:
|
||||||
|
"""Returns true if the given repository is a valid/clonable git repo"""
|
||||||
|
exit_code = call((git_command, "ls-remote", repo, "HEAD"), exit_on_error=False, silent=True)
|
||||||
|
return exit_code == 0
|
||||||
|
|
||||||
|
|
||||||
def git_branch(git_command: str) -> str:
|
def git_branch(git_command: str) -> str:
|
||||||
# Get current branch name.
|
# Get current branch name.
|
||||||
try:
|
try:
|
||||||
@@ -70,6 +110,20 @@ def git_branch(git_command: str) -> str:
|
|||||||
return branch.strip().decode('utf8')
|
return branch.strip().decode('utf8')
|
||||||
|
|
||||||
|
|
||||||
|
def git_get_config(git_command: str, key: str, file: Optional[str] = None) -> str:
|
||||||
|
if file:
|
||||||
|
return check_output([git_command, "config", "--file", file, "--get", key])
|
||||||
|
|
||||||
|
return check_output([git_command, "config", "--get", key])
|
||||||
|
|
||||||
|
|
||||||
|
def git_set_config(git_command: str, key: str, value: str, file: Optional[str] = None) -> str:
|
||||||
|
if file:
|
||||||
|
return check_output([git_command, "config", "--file", file, key, value])
|
||||||
|
|
||||||
|
return check_output([git_command, "config", key, value])
|
||||||
|
|
||||||
|
|
||||||
def git_tag(git_command: str) -> Optional[str]:
|
def git_tag(git_command: str) -> Optional[str]:
|
||||||
# Get current tag name.
|
# Get current tag name.
|
||||||
try:
|
try:
|
||||||
|
@@ -3,9 +3,9 @@ if NOT exist "%BLENDER_DIR%\source\tools\.git" (
|
|||||||
if not "%GIT%" == "" (
|
if not "%GIT%" == "" (
|
||||||
"%GIT%" submodule update --init --recursive --progress
|
"%GIT%" submodule update --init --recursive --progress
|
||||||
if errorlevel 1 goto FAIL
|
if errorlevel 1 goto FAIL
|
||||||
"%GIT%" submodule foreach git checkout master
|
"%GIT%" submodule foreach git checkout main
|
||||||
if errorlevel 1 goto FAIL
|
if errorlevel 1 goto FAIL
|
||||||
"%GIT%" submodule foreach git pull --rebase origin master
|
"%GIT%" submodule foreach git pull --rebase origin main
|
||||||
if errorlevel 1 goto FAIL
|
if errorlevel 1 goto FAIL
|
||||||
goto EOF
|
goto EOF
|
||||||
) else (
|
) else (
|
||||||
|
@@ -4,9 +4,9 @@ if "%GIT%" == "" (
|
|||||||
)
|
)
|
||||||
cd "%BLENDER_DIR%"
|
cd "%BLENDER_DIR%"
|
||||||
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Branch_hash=%%i
|
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Branch_hash=%%i
|
||||||
cd "%BLENDER_DIR%/release/datafiles/locale"
|
cd "%BLENDER_DIR%/locale"
|
||||||
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Locale_hash=%%i
|
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Locale_hash=%%i
|
||||||
cd "%BLENDER_DIR%/release/scripts/addons"
|
cd "%BLENDER_DIR%/scripts/addons"
|
||||||
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Addons_Hash=%%i
|
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Addons_Hash=%%i
|
||||||
cd "%BLENDER_DIR%"
|
cd "%BLENDER_DIR%"
|
||||||
:EOF
|
:EOF
|
@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
|
|||||||
# could be handy for archiving the generated documentation or if some version
|
# could be handy for archiving the generated documentation or if some version
|
||||||
# control system is used.
|
# control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = V3.5
|
PROJECT_NUMBER = V3.6
|
||||||
|
|
||||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||||
# for a project that appears at the top of each page and should give viewer a
|
# for a project that appears at the top of each page and should give viewer a
|
||||||
|
@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
|
|||||||
# BLF drawing routine
|
# BLF drawing routine
|
||||||
font_id = font_info["font_id"]
|
font_id = font_info["font_id"]
|
||||||
blf.position(font_id, 2, 80, 0)
|
blf.position(font_id, 2, 80, 0)
|
||||||
blf.size(font_id, 50, 72)
|
blf.size(font_id, 50)
|
||||||
blf.draw(font_id, "Hello World")
|
blf.draw(font_id, "Hello World")
|
||||||
|
|
||||||
|
|
||||||
|
@@ -31,7 +31,7 @@ For an overview of BMesh data types and how they reference each other see:
|
|||||||
Example Script
|
Example Script
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
.. literalinclude:: __/__/__/release/scripts/templates_py/bmesh_simple.py
|
.. literalinclude:: __/__/__/scripts/templates_py/bmesh_simple.py
|
||||||
|
|
||||||
|
|
||||||
Standalone Module
|
Standalone Module
|
||||||
|
@@ -288,7 +288,7 @@ In Python, this is done by defining a class, which is a subclass of an existing
|
|||||||
Example Operator
|
Example Operator
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
.. literalinclude:: __/__/__/release/scripts/templates_py/operator_simple.py
|
.. literalinclude:: __/__/__/scripts/templates_py/operator_simple.py
|
||||||
|
|
||||||
Once this script runs, ``SimpleOperator`` is registered with Blender
|
Once this script runs, ``SimpleOperator`` is registered with Blender
|
||||||
and can be called from Operator Search or added to the toolbar.
|
and can be called from Operator Search or added to the toolbar.
|
||||||
@@ -320,7 +320,7 @@ Example Panel
|
|||||||
Panels are registered as a class, like an operator.
|
Panels are registered as a class, like an operator.
|
||||||
Notice the extra ``bl_`` variables used to set the context they display in.
|
Notice the extra ``bl_`` variables used to set the context they display in.
|
||||||
|
|
||||||
.. literalinclude:: __/__/__/release/scripts/templates_py/ui_panel_simple.py
|
.. literalinclude:: __/__/__/scripts/templates_py/ui_panel_simple.py
|
||||||
|
|
||||||
To run the script:
|
To run the script:
|
||||||
|
|
||||||
|
@@ -367,13 +367,13 @@ except ImportError:
|
|||||||
# Note that ".." is replaced by "__" in the RST files,
|
# Note that ".." is replaced by "__" in the RST files,
|
||||||
# to avoid having to match Blender's source tree.
|
# to avoid having to match Blender's source tree.
|
||||||
EXTRA_SOURCE_FILES = (
|
EXTRA_SOURCE_FILES = (
|
||||||
"../../../release/scripts/templates_py/bmesh_simple.py",
|
"../../../scripts/templates_py/bmesh_simple.py",
|
||||||
"../../../release/scripts/templates_py/gizmo_operator.py",
|
"../../../scripts/templates_py/gizmo_operator.py",
|
||||||
"../../../release/scripts/templates_py/gizmo_operator_target.py",
|
"../../../scripts/templates_py/gizmo_operator_target.py",
|
||||||
"../../../release/scripts/templates_py/gizmo_simple.py",
|
"../../../scripts/templates_py/gizmo_simple.py",
|
||||||
"../../../release/scripts/templates_py/operator_simple.py",
|
"../../../scripts/templates_py/operator_simple.py",
|
||||||
"../../../release/scripts/templates_py/ui_panel_simple.py",
|
"../../../scripts/templates_py/ui_panel_simple.py",
|
||||||
"../../../release/scripts/templates_py/ui_previews_custom_icon.py",
|
"../../../scripts/templates_py/ui_previews_custom_icon.py",
|
||||||
"../examples/bmesh.ops.1.py",
|
"../examples/bmesh.ops.1.py",
|
||||||
"../examples/bpy.app.translations.py",
|
"../examples/bpy.app.translations.py",
|
||||||
)
|
)
|
||||||
@@ -476,7 +476,7 @@ MODULE_GROUPING = {
|
|||||||
|
|
||||||
# -------------------------------BLENDER----------------------------------------
|
# -------------------------------BLENDER----------------------------------------
|
||||||
|
|
||||||
# converting bytes to strings, due to T30154
|
# Converting bytes to strings, due to #30154.
|
||||||
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
|
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
|
||||||
BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp
|
BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp
|
||||||
|
|
||||||
@@ -487,7 +487,7 @@ BLENDER_VERSION_DOTS = "%d.%d" % (bpy.app.version[0], bpy.app.version[1])
|
|||||||
if BLENDER_REVISION != "Unknown":
|
if BLENDER_REVISION != "Unknown":
|
||||||
# SHA1 Git hash
|
# SHA1 Git hash
|
||||||
BLENDER_VERSION_HASH = BLENDER_REVISION
|
BLENDER_VERSION_HASH = BLENDER_REVISION
|
||||||
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://developer.blender.org/rB%s>%s</a>" % (
|
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://projects.blender.org/blender/blender/commit/%s>%s</a>" % (
|
||||||
BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
|
BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
|
||||||
)
|
)
|
||||||
BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
|
BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
|
||||||
@@ -647,7 +647,7 @@ def undocumented_message(module_name, type_name, identifier):
|
|||||||
module_name, type_name, identifier,
|
module_name, type_name, identifier,
|
||||||
)
|
)
|
||||||
|
|
||||||
return "Undocumented, consider `contributing <https://developer.blender.org/T51061>`__."
|
return "Undocumented, consider `contributing <https://developer.blender.org/>`__."
|
||||||
|
|
||||||
|
|
||||||
def range_str(val):
|
def range_str(val):
|
||||||
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):
|
|||||||
|
|
||||||
# operators
|
# operators
|
||||||
def write_ops():
|
def write_ops():
|
||||||
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
|
API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/scripts"
|
||||||
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
|
API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
|
||||||
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
|
API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"
|
||||||
|
|
||||||
op_modules = {}
|
op_modules = {}
|
||||||
op = None
|
op = None
|
||||||
@@ -2200,7 +2200,7 @@ def write_rst_enum_items(basepath, key, key_no_prefix, enum_items):
|
|||||||
Write a single page for a static enum in RST.
|
Write a single page for a static enum in RST.
|
||||||
|
|
||||||
This helps avoiding very large lists being in-lined in many places which is an issue
|
This helps avoiding very large lists being in-lined in many places which is an issue
|
||||||
especially with icons in ``bpy.types.UILayout``. See T87008.
|
especially with icons in ``bpy.types.UILayout``. See #87008.
|
||||||
"""
|
"""
|
||||||
filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
|
filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
|
||||||
with open(filepath, "w", encoding="utf-8") as fh:
|
with open(filepath, "w", encoding="utf-8") as fh:
|
||||||
|
@@ -156,7 +156,7 @@ var Popover = function() {
|
|||||||
},
|
},
|
||||||
getNamed : function(v) {
|
getNamed : function(v) {
|
||||||
$.each(all_versions, function(ix, title) {
|
$.each(all_versions, function(ix, title) {
|
||||||
if (ix === "master" || ix === "latest") {
|
if (ix === "master" || ix === "main" || ix === "latest") {
|
||||||
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
|
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
|
||||||
if (parseFloat(m) == v) {
|
if (parseFloat(m) == v) {
|
||||||
v = ix;
|
v = ix;
|
||||||
|
2
extern/hipew/README.blender
vendored
2
extern/hipew/README.blender
vendored
@@ -1,5 +1,5 @@
|
|||||||
Project: Blender
|
Project: Blender
|
||||||
URL: https://git.blender.org/blender.git
|
URL: https://projects.blender.org/blender/blender.git
|
||||||
License: Apache 2.0
|
License: Apache 2.0
|
||||||
Upstream version: N/A
|
Upstream version: N/A
|
||||||
Local modifications: None
|
Local modifications: None
|
||||||
|
@@ -12,6 +12,7 @@ from bpy.props import (
|
|||||||
PointerProperty,
|
PointerProperty,
|
||||||
StringProperty,
|
StringProperty,
|
||||||
)
|
)
|
||||||
|
from bpy.app.translations import pgettext_iface as iface_
|
||||||
|
|
||||||
from math import pi
|
from math import pi
|
||||||
|
|
||||||
@@ -1664,30 +1665,51 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
|
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
|
||||||
|
|
||||||
if device_type == 'CUDA':
|
if device_type == 'CUDA':
|
||||||
col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
|
compute_capability = "3.0"
|
||||||
|
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
elif device_type == 'OPTIX':
|
elif device_type == 'OPTIX':
|
||||||
col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
|
compute_capability = "5.0"
|
||||||
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
driver_version = "470"
|
||||||
|
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
|
col.label(text="and NVIDIA driver version %s or newer" % driver_version,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
elif device_type == 'HIP':
|
elif device_type == 'HIP':
|
||||||
import sys
|
if True:
|
||||||
if sys.platform[:3] == "win":
|
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
|
||||||
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
|
else:
|
||||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
import sys
|
||||||
elif sys.platform.startswith("linux"):
|
if sys.platform[:3] == "win":
|
||||||
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
|
driver_version = "21.Q4"
|
||||||
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
|
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||||
|
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
|
elif sys.platform.startswith("linux"):
|
||||||
|
driver_version = "22.10"
|
||||||
|
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||||
|
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
|
||||||
|
translate=False)
|
||||||
elif device_type == 'ONEAPI':
|
elif device_type == 'ONEAPI':
|
||||||
import sys
|
import sys
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
|
driver_version = "101.4032"
|
||||||
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
|
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
|
||||||
col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
|
col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
elif sys.platform.startswith("linux"):
|
elif sys.platform.startswith("linux"):
|
||||||
|
driver_version = "1.3.24931"
|
||||||
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
|
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
|
||||||
col.label(text=" - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
|
col.label(text=iface_(" - intel-level-zero-gpu version %s or newer") % driver_version,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
|
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
|
||||||
elif device_type == 'METAL':
|
elif device_type == 'METAL':
|
||||||
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
|
silicon_mac_version = "12.2"
|
||||||
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
|
amd_mac_version = "12.3"
|
||||||
|
col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
|
||||||
|
icon='BLANK1', translate=False)
|
||||||
|
col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
|
||||||
|
translate=False)
|
||||||
return
|
return
|
||||||
|
|
||||||
for device in devices:
|
for device in devices:
|
||||||
@@ -1723,12 +1745,21 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
|
|
||||||
if compute_device_type == 'METAL':
|
if compute_device_type == 'METAL':
|
||||||
import platform
|
import platform
|
||||||
# MetalRT only works on Apple Silicon at present, pending argument encoding fixes on AMD
|
import re
|
||||||
# Kernel specialization is only viable on Apple Silicon at present due to relative compilation speed
|
is_navi_2 = False
|
||||||
if platform.machine() == 'arm64':
|
for device in devices:
|
||||||
|
if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
|
||||||
|
is_navi_2 = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# MetalRT only works on Apple Silicon and Navi2.
|
||||||
|
is_arm64 = platform.machine() == 'arm64'
|
||||||
|
if is_arm64 or is_navi_2:
|
||||||
col = layout.column()
|
col = layout.column()
|
||||||
col.use_property_split = True
|
col.use_property_split = True
|
||||||
col.prop(self, "kernel_optimization_level")
|
# Kernel specialization is only supported on Apple Silicon
|
||||||
|
if is_arm64:
|
||||||
|
col.prop(self, "kernel_optimization_level")
|
||||||
col.prop(self, "use_metalrt")
|
col.prop(self, "use_metalrt")
|
||||||
|
|
||||||
def draw(self, context):
|
def draw(self, context):
|
||||||
|
@@ -20,7 +20,7 @@ class CyclesPresetPanel(PresetPanel, Panel):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def post_cb(context):
|
def post_cb(context):
|
||||||
# Modify an arbitrary built-in scene property to force a depsgraph
|
# Modify an arbitrary built-in scene property to force a depsgraph
|
||||||
# update, because add-on properties don't. (see T62325)
|
# update, because add-on properties don't. (see #62325)
|
||||||
render = context.scene.render
|
render = context.scene.render
|
||||||
render.filter_size = render.filter_size
|
render.filter_size = render.filter_size
|
||||||
|
|
||||||
|
@@ -105,11 +105,12 @@ GPUShader *BlenderFallbackDisplayShader::bind(int width, int height)
|
|||||||
|
|
||||||
/* Bind shader now to enable uniform assignment. */
|
/* Bind shader now to enable uniform assignment. */
|
||||||
GPU_shader_bind(shader_program_);
|
GPU_shader_bind(shader_program_);
|
||||||
GPU_shader_uniform_int(shader_program_, image_texture_location_, 0);
|
int slot = 0;
|
||||||
|
GPU_shader_uniform_int_ex(shader_program_, image_texture_location_, 1, 1, &slot);
|
||||||
float size[2];
|
float size[2];
|
||||||
size[0] = width;
|
size[0] = width;
|
||||||
size[1] = height;
|
size[1] = height;
|
||||||
GPU_shader_uniform_vector(shader_program_, fullscreen_location_, 2, 1, size);
|
GPU_shader_uniform_float_ex(shader_program_, fullscreen_location_, 2, 1, size);
|
||||||
return shader_program_;
|
return shader_program_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -20,7 +20,7 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
|
|||||||
: b_image(b_image),
|
: b_image(b_image),
|
||||||
frame(frame),
|
frame(frame),
|
||||||
tile_number(tile_number),
|
tile_number(tile_number),
|
||||||
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
|
/* Don't free cache for preview render to avoid race condition from #93560, to be fixed
|
||||||
* properly later as we are close to release. */
|
* properly later as we are close to release. */
|
||||||
free_cache(!is_preview_render && !b_image.has_data())
|
free_cache(!is_preview_render && !b_image.has_data())
|
||||||
{
|
{
|
||||||
@@ -72,7 +72,7 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
|
|||||||
metadata.colorspace = u_colorspace_raw;
|
metadata.colorspace = u_colorspace_raw;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
|
/* In some cases (e.g. #94135), the colorspace setting in Blender gets updated as part of the
|
||||||
* metadata queries in this function, so update the colorspace setting here. */
|
* metadata queries in this function, so update the colorspace setting here. */
|
||||||
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
|
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
|
||||||
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
|
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
|
||||||
|
@@ -24,7 +24,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
|
|||||||
Light *light = light_map.find(key);
|
Light *light = light_map.find(key);
|
||||||
|
|
||||||
/* Check if the transform was modified, in case a linked collection is moved we do not get a
|
/* Check if the transform was modified, in case a linked collection is moved we do not get a
|
||||||
* specific depsgraph update (T88515). This also mimics the behavior for Objects. */
|
* specific depsgraph update (#88515). This also mimics the behavior for Objects. */
|
||||||
const bool tfm_updated = (light && light->get_tfm() != tfm);
|
const bool tfm_updated = (light && light->get_tfm() != tfm);
|
||||||
|
|
||||||
/* Update if either object or light data changed. */
|
/* Update if either object or light data changed. */
|
||||||
|
@@ -94,7 +94,7 @@ void python_thread_state_restore(void **python_thread_state)
|
|||||||
*python_thread_state = NULL;
|
*python_thread_state = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
|
static const char *PyC_UnicodeAsBytes(PyObject *py_str, PyObject **coerce)
|
||||||
{
|
{
|
||||||
const char *result = PyUnicode_AsUTF8(py_str);
|
const char *result = PyUnicode_AsUTF8(py_str);
|
||||||
if (result) {
|
if (result) {
|
||||||
@@ -131,8 +131,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
|
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
|
||||||
path_init(PyC_UnicodeAsByte(path, &path_coerce),
|
path_init(PyC_UnicodeAsBytes(path, &path_coerce),
|
||||||
PyC_UnicodeAsByte(user_path, &user_path_coerce));
|
PyC_UnicodeAsBytes(user_path, &user_path_coerce));
|
||||||
Py_XDECREF(path_coerce);
|
Py_XDECREF(path_coerce);
|
||||||
Py_XDECREF(user_path_coerce);
|
Py_XDECREF(user_path_coerce);
|
||||||
|
|
||||||
|
@@ -404,7 +404,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
|||||||
* point we know that we've got everything to render current view layer.
|
* point we know that we've got everything to render current view layer.
|
||||||
*/
|
*/
|
||||||
/* At the moment we only free if we are not doing multi-view
|
/* At the moment we only free if we are not doing multi-view
|
||||||
* (or if we are rendering the last view). See T58142/D4239 for discussion.
|
* (or if we are rendering the last view). See #58142/D4239 for discussion.
|
||||||
*/
|
*/
|
||||||
if (view_index == num_views - 1) {
|
if (view_index == num_views - 1) {
|
||||||
free_blender_memory_if_possible();
|
free_blender_memory_if_possible();
|
||||||
|
@@ -349,8 +349,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
|||||||
|
|
||||||
bool use_light_tree = get_boolean(cscene, "use_light_tree");
|
bool use_light_tree = get_boolean(cscene, "use_light_tree");
|
||||||
integrator->set_use_light_tree(use_light_tree);
|
integrator->set_use_light_tree(use_light_tree);
|
||||||
integrator->set_light_sampling_threshold(
|
integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
|
||||||
(use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
|
|
||||||
|
|
||||||
if (integrator->use_light_tree_is_modified()) {
|
if (integrator->use_light_tree_is_modified()) {
|
||||||
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
|
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
|
||||||
@@ -766,7 +765,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
|
|||||||
(BlenderSession::headless || is_interface_locked) &&
|
(BlenderSession::headless || is_interface_locked) &&
|
||||||
/* Baking re-uses the depsgraph multiple times, clearing crashes
|
/* Baking re-uses the depsgraph multiple times, clearing crashes
|
||||||
* reading un-evaluated mesh data which isn't aligned with the
|
* reading un-evaluated mesh data which isn't aligned with the
|
||||||
* geometry we're baking, see T71012. */
|
* geometry we're baking, see #71012. */
|
||||||
!scene->bake_manager->get_baking() &&
|
!scene->bake_manager->get_baking() &&
|
||||||
/* Persistent data must main caches for performance and correctness. */
|
/* Persistent data must main caches for performance and correctness. */
|
||||||
!is_persistent_data;
|
!is_persistent_data;
|
||||||
|
@@ -42,12 +42,15 @@ endif()
|
|||||||
###########################################################################
|
###########################################################################
|
||||||
|
|
||||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||||
find_package(HIP)
|
set(WITH_CYCLES_HIP_BINARIES OFF)
|
||||||
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
message(STATUS "HIP temporarily disabled due to compiler bugs")
|
||||||
|
|
||||||
if(HIP_FOUND)
|
# find_package(HIP)
|
||||||
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
||||||
endif()
|
|
||||||
|
# if(HIP_FOUND)
|
||||||
|
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||||
|
# endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT WITH_HIP_DYNLOAD)
|
if(NOT WITH_HIP_DYNLOAD)
|
||||||
|
@@ -53,8 +53,12 @@ void CUDADevice::set_error(const string &error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
|
: GPUDevice(info, stats, profiler)
|
||||||
{
|
{
|
||||||
|
/* Verify that base class types can be used with specific backend types */
|
||||||
|
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
|
||||||
|
static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
|
||||||
|
|
||||||
first_error = true;
|
first_error = true;
|
||||||
|
|
||||||
cuDevId = info.num;
|
cuDevId = info.num;
|
||||||
@@ -65,12 +69,6 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
|
|
||||||
need_texture_info = false;
|
need_texture_info = false;
|
||||||
|
|
||||||
device_texture_headroom = 0;
|
|
||||||
device_working_headroom = 0;
|
|
||||||
move_texture_to_host = false;
|
|
||||||
map_host_limit = 0;
|
|
||||||
map_host_used = 0;
|
|
||||||
can_map_host = 0;
|
|
||||||
pitch_alignment = 0;
|
pitch_alignment = 0;
|
||||||
|
|
||||||
/* Initialize CUDA. */
|
/* Initialize CUDA. */
|
||||||
@@ -91,8 +89,9 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
|
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
|
||||||
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
|
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
|
||||||
* so we can predict which memory to map to host. */
|
* so we can predict which memory to map to host. */
|
||||||
cuda_assert(
|
int value;
|
||||||
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
||||||
|
can_map_host = value != 0;
|
||||||
|
|
||||||
cuda_assert(cuDeviceGetAttribute(
|
cuda_assert(cuDeviceGetAttribute(
|
||||||
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
|
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
|
||||||
@@ -499,311 +498,57 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
|
|||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUDADevice::init_host_memory()
|
void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
|
||||||
{
|
|
||||||
/* Limit amount of host mapped memory, because allocating too much can
|
|
||||||
* cause system instability. Leave at least half or 4 GB of system
|
|
||||||
* memory free, whichever is smaller. */
|
|
||||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
|
||||||
size_t system_ram = system_physical_ram();
|
|
||||||
|
|
||||||
if (system_ram > 0) {
|
|
||||||
if (system_ram / 2 > default_limit) {
|
|
||||||
map_host_limit = system_ram - default_limit;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
map_host_limit = system_ram / 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
|
||||||
map_host_limit = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Amount of device memory to keep is free after texture memory
|
|
||||||
* and working memory allocations respectively. We set the working
|
|
||||||
* memory limit headroom lower so that some space is left after all
|
|
||||||
* texture memory allocations. */
|
|
||||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
|
||||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
|
||||||
|
|
||||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
|
||||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
void CUDADevice::load_texture_info()
|
|
||||||
{
|
|
||||||
if (need_texture_info) {
|
|
||||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
|
||||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
|
||||||
need_texture_info = false;
|
|
||||||
texture_info.copy_to_device();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
|
|
||||||
{
|
|
||||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
|
||||||
static bool any_device_moving_textures_to_host = false;
|
|
||||||
if (any_device_moving_textures_to_host) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Signal to reallocate textures in host memory only. */
|
|
||||||
move_texture_to_host = true;
|
|
||||||
|
|
||||||
while (size > 0) {
|
|
||||||
/* Find suitable memory allocation to move. */
|
|
||||||
device_memory *max_mem = NULL;
|
|
||||||
size_t max_size = 0;
|
|
||||||
bool max_is_image = false;
|
|
||||||
|
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
|
||||||
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
|
|
||||||
device_memory &mem = *pair.first;
|
|
||||||
CUDAMem *cmem = &pair.second;
|
|
||||||
|
|
||||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
|
||||||
* And need to ignore memory that is already on the host. */
|
|
||||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
|
||||||
(&mem != &texture_info);
|
|
||||||
bool is_image = is_texture && (mem.data_height > 1);
|
|
||||||
|
|
||||||
/* Can't move this type of memory. */
|
|
||||||
if (!is_texture || cmem->array) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For other textures, only move image textures. */
|
|
||||||
if (for_texture && !is_image) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try to move largest allocation, prefer moving images. */
|
|
||||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
|
||||||
max_is_image = is_image;
|
|
||||||
max_size = mem.device_size;
|
|
||||||
max_mem = &mem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
/* Move to host memory. This part is mutex protected since
|
|
||||||
* multiple CUDA devices could be moving the memory. The
|
|
||||||
* first one will do it, and the rest will adopt the pointer. */
|
|
||||||
if (max_mem) {
|
|
||||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
|
||||||
|
|
||||||
static thread_mutex move_mutex;
|
|
||||||
thread_scoped_lock lock(move_mutex);
|
|
||||||
|
|
||||||
any_device_moving_textures_to_host = true;
|
|
||||||
|
|
||||||
/* Potentially need to call back into multi device, so pointer mapping
|
|
||||||
* and peer devices are updated. This is also necessary since the device
|
|
||||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
|
||||||
* Unfortunately it does mean that memory is reallocated on all other
|
|
||||||
* devices as well, which is potentially dangerous when still in use (since
|
|
||||||
* a thread rendering on another devices would only be caught in this mutex
|
|
||||||
* if it so happens to do an allocation at the same time as well. */
|
|
||||||
max_mem->device_copy_to();
|
|
||||||
size = (max_size >= size) ? 0 : size - max_size;
|
|
||||||
|
|
||||||
any_device_moving_textures_to_host = false;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
|
||||||
move_texture_to_host = false;
|
|
||||||
|
|
||||||
/* Update texture info array with new pointers. */
|
|
||||||
load_texture_info();
|
|
||||||
}
|
|
||||||
|
|
||||||
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
|
||||||
{
|
{
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
|
|
||||||
CUdeviceptr device_pointer = 0;
|
|
||||||
size_t size = mem.memory_size() + pitch_padding;
|
|
||||||
|
|
||||||
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
|
|
||||||
const char *status = "";
|
|
||||||
|
|
||||||
/* First try allocating in device memory, respecting headroom. We make
|
|
||||||
* an exception for texture info. It is small and frequently accessed,
|
|
||||||
* so treat it as working memory.
|
|
||||||
*
|
|
||||||
* If there is not enough room for working memory, we will try to move
|
|
||||||
* textures to host memory, assuming the performance impact would have
|
|
||||||
* been worse for working memory. */
|
|
||||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
|
||||||
bool is_image = is_texture && (mem.data_height > 1);
|
|
||||||
|
|
||||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
|
||||||
|
|
||||||
size_t total = 0, free = 0;
|
|
||||||
cuMemGetInfo(&free, &total);
|
cuMemGetInfo(&free, &total);
|
||||||
|
|
||||||
/* Move textures to host memory if needed. */
|
|
||||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
|
||||||
move_textures_to_host(size + headroom - free, is_texture);
|
|
||||||
cuMemGetInfo(&free, &total);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Allocate in device memory. */
|
|
||||||
if (!move_texture_to_host && (size + headroom) < free) {
|
|
||||||
mem_alloc_result = cuMemAlloc(&device_pointer, size);
|
|
||||||
if (mem_alloc_result == CUDA_SUCCESS) {
|
|
||||||
status = " in device memory";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fall back to mapped host memory if needed and possible. */
|
|
||||||
|
|
||||||
void *shared_pointer = 0;
|
|
||||||
|
|
||||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
|
||||||
if (mem.shared_pointer) {
|
|
||||||
/* Another device already allocated host memory. */
|
|
||||||
mem_alloc_result = CUDA_SUCCESS;
|
|
||||||
shared_pointer = mem.shared_pointer;
|
|
||||||
}
|
|
||||||
else if (map_host_used + size < map_host_limit) {
|
|
||||||
/* Allocate host memory ourselves. */
|
|
||||||
mem_alloc_result = cuMemHostAlloc(
|
|
||||||
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
|
|
||||||
|
|
||||||
assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
|
|
||||||
(mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem_alloc_result == CUDA_SUCCESS) {
|
|
||||||
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
|
|
||||||
map_host_used += size;
|
|
||||||
status = " in host memory";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem_alloc_result != CUDA_SUCCESS) {
|
|
||||||
if (mem.type == MEM_DEVICE_ONLY) {
|
|
||||||
status = " failed, out of device memory";
|
|
||||||
set_error("System is out of GPU memory");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
status = " failed, out of device and host memory";
|
|
||||||
set_error("System is out of GPU and shared host memory");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem.name) {
|
|
||||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
|
||||||
}
|
|
||||||
|
|
||||||
mem.device_pointer = (device_ptr)device_pointer;
|
|
||||||
mem.device_size = size;
|
|
||||||
stats.mem_alloc(size);
|
|
||||||
|
|
||||||
if (!mem.device_pointer) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Insert into map of allocations. */
|
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
|
||||||
CUDAMem *cmem = &cuda_mem_map[&mem];
|
|
||||||
if (shared_pointer != 0) {
|
|
||||||
/* Replace host pointer with our host allocation. Only works if
|
|
||||||
* CUDA memory layout is the same and has no pitch padding. Also
|
|
||||||
* does not work if we move textures to host during a render,
|
|
||||||
* since other devices might be using the memory. */
|
|
||||||
|
|
||||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
|
||||||
mem.host_pointer != shared_pointer) {
|
|
||||||
memcpy(shared_pointer, mem.host_pointer, size);
|
|
||||||
|
|
||||||
/* A Call to device_memory::host_free() should be preceded by
|
|
||||||
* a call to device_memory::device_free() for host memory
|
|
||||||
* allocated by a device to be handled properly. Two exceptions
|
|
||||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
|
||||||
* the current host memory can be assumed to be allocated by
|
|
||||||
* device_memory::host_alloc(), not by a device */
|
|
||||||
|
|
||||||
mem.host_free();
|
|
||||||
mem.host_pointer = shared_pointer;
|
|
||||||
}
|
|
||||||
mem.shared_pointer = shared_pointer;
|
|
||||||
mem.shared_counter++;
|
|
||||||
cmem->use_mapped_host = true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
cmem->use_mapped_host = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return cmem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUDADevice::generic_copy_to(device_memory &mem)
|
bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
|
||||||
{
|
{
|
||||||
if (!mem.host_pointer || !mem.device_pointer) {
|
CUDAContextScope scope(this);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
|
||||||
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
return mem_alloc_result == CUDA_SUCCESS;
|
||||||
* mem.host_pointer. */
|
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
|
||||||
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
|
||||||
const CUDAContextScope scope(this);
|
|
||||||
cuda_assert(
|
|
||||||
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUDADevice::generic_free(device_memory &mem)
|
void CUDADevice::free_device(void *device_pointer)
|
||||||
{
|
{
|
||||||
if (mem.device_pointer) {
|
CUDAContextScope scope(this);
|
||||||
CUDAContextScope scope(this);
|
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
|
||||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
|
||||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
|
||||||
|
|
||||||
/* If cmem.use_mapped_host is true, reference counting is used
|
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
|
||||||
* to safely free a mapped host memory. */
|
}
|
||||||
|
|
||||||
if (cmem.use_mapped_host) {
|
bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
|
||||||
assert(mem.shared_pointer);
|
{
|
||||||
if (mem.shared_pointer) {
|
CUDAContextScope scope(this);
|
||||||
assert(mem.shared_counter > 0);
|
|
||||||
if (--mem.shared_counter == 0) {
|
|
||||||
if (mem.host_pointer == mem.shared_pointer) {
|
|
||||||
mem.host_pointer = 0;
|
|
||||||
}
|
|
||||||
cuMemFreeHost(mem.shared_pointer);
|
|
||||||
mem.shared_pointer = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
map_host_used -= mem.device_size;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Free device memory. */
|
|
||||||
cuda_assert(cuMemFree(mem.device_pointer));
|
|
||||||
}
|
|
||||||
|
|
||||||
stats.mem_free(mem.device_size);
|
CUresult mem_alloc_result = cuMemHostAlloc(
|
||||||
mem.device_pointer = 0;
|
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
|
||||||
mem.device_size = 0;
|
return mem_alloc_result == CUDA_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
void CUDADevice::free_host(void *shared_pointer)
|
||||||
}
|
{
|
||||||
|
CUDAContextScope scope(this);
|
||||||
|
|
||||||
|
cuMemFreeHost(shared_pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||||
|
{
|
||||||
|
CUDAContextScope scope(this);
|
||||||
|
|
||||||
|
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||||
|
{
|
||||||
|
const CUDAContextScope scope(this);
|
||||||
|
|
||||||
|
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUDADevice::mem_alloc(device_memory &mem)
|
void CUDADevice::mem_alloc(device_memory &mem)
|
||||||
@@ -868,8 +613,8 @@ void CUDADevice::mem_zero(device_memory &mem)
|
|||||||
|
|
||||||
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
||||||
* regardless of mem.host_pointer and mem.shared_pointer. */
|
* regardless of mem.host_pointer and mem.shared_pointer. */
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||||
const CUDAContextScope scope(this);
|
const CUDAContextScope scope(this);
|
||||||
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
|
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
|
||||||
}
|
}
|
||||||
@@ -994,19 +739,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
CUDAMem *cmem = NULL;
|
Mem *cmem = NULL;
|
||||||
CUarray array_3d = NULL;
|
CUarray array_3d = NULL;
|
||||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||||
size_t dst_pitch = src_pitch;
|
size_t dst_pitch = src_pitch;
|
||||||
|
|
||||||
if (!mem.is_resident(this)) {
|
if (!mem.is_resident(this)) {
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
cmem = &cuda_mem_map[&mem];
|
cmem = &device_mem_map[&mem];
|
||||||
cmem->texobject = 0;
|
cmem->texobject = 0;
|
||||||
|
|
||||||
if (mem.data_depth > 1) {
|
if (mem.data_depth > 1) {
|
||||||
array_3d = (CUarray)mem.device_pointer;
|
array_3d = (CUarray)mem.device_pointer;
|
||||||
cmem->array = array_3d;
|
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||||
}
|
}
|
||||||
else if (mem.data_height > 0) {
|
else if (mem.data_height > 0) {
|
||||||
dst_pitch = align_up(src_pitch, pitch_alignment);
|
dst_pitch = align_up(src_pitch, pitch_alignment);
|
||||||
@@ -1050,10 +795,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
|||||||
mem.device_size = size;
|
mem.device_size = size;
|
||||||
stats.mem_alloc(size);
|
stats.mem_alloc(size);
|
||||||
|
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
cmem = &cuda_mem_map[&mem];
|
cmem = &device_mem_map[&mem];
|
||||||
cmem->texobject = 0;
|
cmem->texobject = 0;
|
||||||
cmem->array = array_3d;
|
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||||
}
|
}
|
||||||
else if (mem.data_height > 0) {
|
else if (mem.data_height > 0) {
|
||||||
/* 2D texture, using pitch aligned linear memory. */
|
/* 2D texture, using pitch aligned linear memory. */
|
||||||
@@ -1137,8 +882,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
|||||||
texDesc.filterMode = filter_mode;
|
texDesc.filterMode = filter_mode;
|
||||||
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
||||||
|
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
cmem = &cuda_mem_map[&mem];
|
cmem = &device_mem_map[&mem];
|
||||||
|
|
||||||
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||||
|
|
||||||
@@ -1153,9 +898,9 @@ void CUDADevice::tex_free(device_texture &mem)
|
|||||||
{
|
{
|
||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
const Mem &cmem = device_mem_map[&mem];
|
||||||
|
|
||||||
if (cmem.texobject) {
|
if (cmem.texobject) {
|
||||||
/* Free bindless texture. */
|
/* Free bindless texture. */
|
||||||
@@ -1164,16 +909,16 @@ void CUDADevice::tex_free(device_texture &mem)
|
|||||||
|
|
||||||
if (!mem.is_resident(this)) {
|
if (!mem.is_resident(this)) {
|
||||||
/* Do not free memory here, since it was allocated on a different device. */
|
/* Do not free memory here, since it was allocated on a different device. */
|
||||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
device_mem_map.erase(device_mem_map.find(&mem));
|
||||||
}
|
}
|
||||||
else if (cmem.array) {
|
else if (cmem.array) {
|
||||||
/* Free array. */
|
/* Free array. */
|
||||||
cuArrayDestroy(cmem.array);
|
cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
|
||||||
stats.mem_free(mem.device_size);
|
stats.mem_free(mem.device_size);
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
mem.device_size = 0;
|
mem.device_size = 0;
|
||||||
|
|
||||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
device_mem_map.erase(device_mem_map.find(&mem));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class DeviceQueue;
|
class DeviceQueue;
|
||||||
|
|
||||||
class CUDADevice : public Device {
|
class CUDADevice : public GPUDevice {
|
||||||
|
|
||||||
friend class CUDAContextScope;
|
friend class CUDAContextScope;
|
||||||
|
|
||||||
@@ -29,36 +29,11 @@ class CUDADevice : public Device {
|
|||||||
CUdevice cuDevice;
|
CUdevice cuDevice;
|
||||||
CUcontext cuContext;
|
CUcontext cuContext;
|
||||||
CUmodule cuModule;
|
CUmodule cuModule;
|
||||||
size_t device_texture_headroom;
|
|
||||||
size_t device_working_headroom;
|
|
||||||
bool move_texture_to_host;
|
|
||||||
size_t map_host_used;
|
|
||||||
size_t map_host_limit;
|
|
||||||
int can_map_host;
|
|
||||||
int pitch_alignment;
|
int pitch_alignment;
|
||||||
int cuDevId;
|
int cuDevId;
|
||||||
int cuDevArchitecture;
|
int cuDevArchitecture;
|
||||||
bool first_error;
|
bool first_error;
|
||||||
|
|
||||||
struct CUDAMem {
|
|
||||||
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
CUtexObject texobject;
|
|
||||||
CUarray array;
|
|
||||||
|
|
||||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
|
||||||
bool use_mapped_host;
|
|
||||||
};
|
|
||||||
typedef map<device_memory *, CUDAMem> CUDAMemMap;
|
|
||||||
CUDAMemMap cuda_mem_map;
|
|
||||||
thread_mutex cuda_mem_map_mutex;
|
|
||||||
|
|
||||||
/* Bindless Textures */
|
|
||||||
device_vector<TextureInfo> texture_info;
|
|
||||||
bool need_texture_info;
|
|
||||||
|
|
||||||
CUDADeviceKernels kernels;
|
CUDADeviceKernels kernels;
|
||||||
|
|
||||||
static bool have_precompiled_kernels();
|
static bool have_precompiled_kernels();
|
||||||
@@ -88,17 +63,13 @@ class CUDADevice : public Device {
|
|||||||
|
|
||||||
void reserve_local_memory(const uint kernel_features);
|
void reserve_local_memory(const uint kernel_features);
|
||||||
|
|
||||||
void init_host_memory();
|
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||||
|
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||||
void load_texture_info();
|
virtual void free_device(void *device_pointer) override;
|
||||||
|
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||||
void move_textures_to_host(size_t size, bool for_texture);
|
virtual void free_host(void *shared_pointer) override;
|
||||||
|
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||||
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||||
|
|
||||||
void generic_copy_to(device_memory &mem);
|
|
||||||
|
|
||||||
void generic_free(device_memory &mem);
|
|
||||||
|
|
||||||
void mem_alloc(device_memory &mem) override;
|
void mem_alloc(device_memory &mem) override;
|
||||||
|
|
||||||
|
@@ -452,6 +452,320 @@ void *Device::get_cpu_osl_memory()
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GPUDevice::~GPUDevice() noexcept(false)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GPUDevice::load_texture_info()
|
||||||
|
{
|
||||||
|
if (need_texture_info) {
|
||||||
|
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||||
|
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||||
|
need_texture_info = false;
|
||||||
|
texture_info.copy_to_device();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
|
||||||
|
size_t preferred_working_headroom)
|
||||||
|
{
|
||||||
|
/* Limit amount of host mapped memory, because allocating too much can
|
||||||
|
* cause system instability. Leave at least half or 4 GB of system
|
||||||
|
* memory free, whichever is smaller. */
|
||||||
|
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||||
|
size_t system_ram = system_physical_ram();
|
||||||
|
|
||||||
|
if (system_ram > 0) {
|
||||||
|
if (system_ram / 2 > default_limit) {
|
||||||
|
map_host_limit = system_ram - default_limit;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
map_host_limit = system_ram / 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||||
|
map_host_limit = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Amount of device memory to keep free after texture memory
|
||||||
|
* and working memory allocations respectively. We set the working
|
||||||
|
* memory limit headroom lower than the working one so there
|
||||||
|
* is space left for it. */
|
||||||
|
device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
|
||||||
|
32 * 1024 * 1024LL; // 32MB
|
||||||
|
device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
|
||||||
|
128 * 1024 * 1024LL; // 128MB
|
||||||
|
|
||||||
|
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||||
|
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
|
||||||
|
{
|
||||||
|
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||||
|
static bool any_device_moving_textures_to_host = false;
|
||||||
|
if (any_device_moving_textures_to_host) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal to reallocate textures in host memory only. */
|
||||||
|
move_texture_to_host = true;
|
||||||
|
|
||||||
|
while (size > 0) {
|
||||||
|
/* Find suitable memory allocation to move. */
|
||||||
|
device_memory *max_mem = NULL;
|
||||||
|
size_t max_size = 0;
|
||||||
|
bool max_is_image = false;
|
||||||
|
|
||||||
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
|
foreach (MemMap::value_type &pair, device_mem_map) {
|
||||||
|
device_memory &mem = *pair.first;
|
||||||
|
Mem *cmem = &pair.second;
|
||||||
|
|
||||||
|
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||||
|
* And need to ignore memory that is already on the host. */
|
||||||
|
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||||
|
(&mem != &texture_info);
|
||||||
|
bool is_image = is_texture && (mem.data_height > 1);
|
||||||
|
|
||||||
|
/* Can't move this type of memory. */
|
||||||
|
if (!is_texture || cmem->array) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For other textures, only move image textures. */
|
||||||
|
if (for_texture && !is_image) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to move largest allocation, prefer moving images. */
|
||||||
|
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||||
|
max_is_image = is_image;
|
||||||
|
max_size = mem.device_size;
|
||||||
|
max_mem = &mem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lock.unlock();
|
||||||
|
|
||||||
|
/* Move to host memory. This part is mutex protected since
|
||||||
|
* multiple backend devices could be moving the memory. The
|
||||||
|
* first one will do it, and the rest will adopt the pointer. */
|
||||||
|
if (max_mem) {
|
||||||
|
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||||
|
|
||||||
|
static thread_mutex move_mutex;
|
||||||
|
thread_scoped_lock lock(move_mutex);
|
||||||
|
|
||||||
|
any_device_moving_textures_to_host = true;
|
||||||
|
|
||||||
|
/* Potentially need to call back into multi device, so pointer mapping
|
||||||
|
* and peer devices are updated. This is also necessary since the device
|
||||||
|
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||||
|
* Unfortunately it does mean that memory is reallocated on all other
|
||||||
|
* devices as well, which is potentially dangerous when still in use (since
|
||||||
|
* a thread rendering on another devices would only be caught in this mutex
|
||||||
|
* if it so happens to do an allocation at the same time as well. */
|
||||||
|
max_mem->device_copy_to();
|
||||||
|
size = (max_size >= size) ? 0 : size - max_size;
|
||||||
|
|
||||||
|
any_device_moving_textures_to_host = false;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||||
|
move_texture_to_host = false;
|
||||||
|
|
||||||
|
/* Update texture info array with new pointers. */
|
||||||
|
load_texture_info();
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||||
|
{
|
||||||
|
void *device_pointer = 0;
|
||||||
|
size_t size = mem.memory_size() + pitch_padding;
|
||||||
|
|
||||||
|
bool mem_alloc_result = false;
|
||||||
|
const char *status = "";
|
||||||
|
|
||||||
|
/* First try allocating in device memory, respecting headroom. We make
|
||||||
|
* an exception for texture info. It is small and frequently accessed,
|
||||||
|
* so treat it as working memory.
|
||||||
|
*
|
||||||
|
* If there is not enough room for working memory, we will try to move
|
||||||
|
* textures to host memory, assuming the performance impact would have
|
||||||
|
* been worse for working memory. */
|
||||||
|
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||||
|
bool is_image = is_texture && (mem.data_height > 1);
|
||||||
|
|
||||||
|
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||||
|
|
||||||
|
size_t total = 0, free = 0;
|
||||||
|
get_device_memory_info(total, free);
|
||||||
|
|
||||||
|
/* Move textures to host memory if needed. */
|
||||||
|
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||||
|
move_textures_to_host(size + headroom - free, is_texture);
|
||||||
|
get_device_memory_info(total, free);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate in device memory. */
|
||||||
|
if (!move_texture_to_host && (size + headroom) < free) {
|
||||||
|
mem_alloc_result = alloc_device(device_pointer, size);
|
||||||
|
if (mem_alloc_result) {
|
||||||
|
device_mem_in_use += size;
|
||||||
|
status = " in device memory";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fall back to mapped host memory if needed and possible. */
|
||||||
|
|
||||||
|
void *shared_pointer = 0;
|
||||||
|
|
||||||
|
if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||||
|
if (mem.shared_pointer) {
|
||||||
|
/* Another device already allocated host memory. */
|
||||||
|
mem_alloc_result = true;
|
||||||
|
shared_pointer = mem.shared_pointer;
|
||||||
|
}
|
||||||
|
else if (map_host_used + size < map_host_limit) {
|
||||||
|
/* Allocate host memory ourselves. */
|
||||||
|
mem_alloc_result = alloc_host(shared_pointer, size);
|
||||||
|
|
||||||
|
assert((mem_alloc_result && shared_pointer != 0) ||
|
||||||
|
(!mem_alloc_result && shared_pointer == 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mem_alloc_result) {
|
||||||
|
assert(transform_host_pointer(device_pointer, shared_pointer));
|
||||||
|
map_host_used += size;
|
||||||
|
status = " in host memory";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mem_alloc_result) {
|
||||||
|
if (mem.type == MEM_DEVICE_ONLY) {
|
||||||
|
status = " failed, out of device memory";
|
||||||
|
set_error("System is out of GPU memory");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
status = " failed, out of device and host memory";
|
||||||
|
set_error("System is out of GPU and shared host memory");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mem.name) {
|
||||||
|
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||||
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
|
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||||
|
}
|
||||||
|
|
||||||
|
mem.device_pointer = (device_ptr)device_pointer;
|
||||||
|
mem.device_size = size;
|
||||||
|
stats.mem_alloc(size);
|
||||||
|
|
||||||
|
if (!mem.device_pointer) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Insert into map of allocations. */
|
||||||
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
|
Mem *cmem = &device_mem_map[&mem];
|
||||||
|
if (shared_pointer != 0) {
|
||||||
|
/* Replace host pointer with our host allocation. Only works if
|
||||||
|
* memory layout is the same and has no pitch padding. Also
|
||||||
|
* does not work if we move textures to host during a render,
|
||||||
|
* since other devices might be using the memory. */
|
||||||
|
|
||||||
|
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||||
|
mem.host_pointer != shared_pointer) {
|
||||||
|
memcpy(shared_pointer, mem.host_pointer, size);
|
||||||
|
|
||||||
|
/* A Call to device_memory::host_free() should be preceded by
|
||||||
|
* a call to device_memory::device_free() for host memory
|
||||||
|
* allocated by a device to be handled properly. Two exceptions
|
||||||
|
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||||
|
* the current host memory can be assumed to be allocated by
|
||||||
|
* device_memory::host_alloc(), not by a device */
|
||||||
|
|
||||||
|
mem.host_free();
|
||||||
|
mem.host_pointer = shared_pointer;
|
||||||
|
}
|
||||||
|
mem.shared_pointer = shared_pointer;
|
||||||
|
mem.shared_counter++;
|
||||||
|
cmem->use_mapped_host = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cmem->use_mapped_host = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmem;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUDevice::generic_free(device_memory &mem)
|
||||||
|
{
|
||||||
|
if (mem.device_pointer) {
|
||||||
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
|
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||||
|
const Mem &cmem = device_mem_map[&mem];
|
||||||
|
|
||||||
|
/* If cmem.use_mapped_host is true, reference counting is used
|
||||||
|
* to safely free a mapped host memory. */
|
||||||
|
|
||||||
|
if (cmem.use_mapped_host) {
|
||||||
|
assert(mem.shared_pointer);
|
||||||
|
if (mem.shared_pointer) {
|
||||||
|
assert(mem.shared_counter > 0);
|
||||||
|
if (--mem.shared_counter == 0) {
|
||||||
|
if (mem.host_pointer == mem.shared_pointer) {
|
||||||
|
mem.host_pointer = 0;
|
||||||
|
}
|
||||||
|
free_host(mem.shared_pointer);
|
||||||
|
mem.shared_pointer = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
map_host_used -= mem.device_size;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Free device memory. */
|
||||||
|
free_device((void *)mem.device_pointer);
|
||||||
|
device_mem_in_use -= mem.device_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.mem_free(mem.device_size);
|
||||||
|
mem.device_pointer = 0;
|
||||||
|
mem.device_size = 0;
|
||||||
|
|
||||||
|
device_mem_map.erase(device_mem_map.find(&mem));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUDevice::generic_copy_to(device_memory &mem)
|
||||||
|
{
|
||||||
|
if (!mem.host_pointer || !mem.device_pointer) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||||
|
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
|
||||||
|
* copy data from mem.host_pointer. */
|
||||||
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
|
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||||
|
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* DeviceInfo */
|
/* DeviceInfo */
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -309,6 +309,93 @@ class Device {
|
|||||||
static uint devices_initialized_mask;
|
static uint devices_initialized_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Device, which is GPU, with some common functionality for GPU backends */
|
||||||
|
class GPUDevice : public Device {
|
||||||
|
protected:
|
||||||
|
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
|
||||||
|
: Device(info_, stats_, profiler_),
|
||||||
|
texture_info(this, "texture_info", MEM_GLOBAL),
|
||||||
|
need_texture_info(false),
|
||||||
|
can_map_host(false),
|
||||||
|
map_host_used(0),
|
||||||
|
map_host_limit(0),
|
||||||
|
device_texture_headroom(0),
|
||||||
|
device_working_headroom(0),
|
||||||
|
device_mem_map(),
|
||||||
|
device_mem_map_mutex(),
|
||||||
|
move_texture_to_host(false),
|
||||||
|
device_mem_in_use(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~GPUDevice() noexcept(false);
|
||||||
|
|
||||||
|
/* For GPUs that can use bindless textures in some way or another. */
|
||||||
|
device_vector<TextureInfo> texture_info;
|
||||||
|
bool need_texture_info;
|
||||||
|
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||||
|
* re-initialization might be needed). */
|
||||||
|
virtual bool load_texture_info();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/* Memory allocation, only accessed through device_memory. */
|
||||||
|
friend class device_memory;
|
||||||
|
|
||||||
|
bool can_map_host;
|
||||||
|
size_t map_host_used;
|
||||||
|
size_t map_host_limit;
|
||||||
|
size_t device_texture_headroom;
|
||||||
|
size_t device_working_headroom;
|
||||||
|
typedef unsigned long long texMemObject;
|
||||||
|
typedef unsigned long long arrayMemObject;
|
||||||
|
struct Mem {
|
||||||
|
Mem() : texobject(0), array(0), use_mapped_host(false)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
texMemObject texobject;
|
||||||
|
arrayMemObject array;
|
||||||
|
|
||||||
|
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||||
|
bool use_mapped_host;
|
||||||
|
};
|
||||||
|
typedef map<device_memory *, Mem> MemMap;
|
||||||
|
MemMap device_mem_map;
|
||||||
|
thread_mutex device_mem_map_mutex;
|
||||||
|
bool move_texture_to_host;
|
||||||
|
/* Simple counter which will try to track amount of used device memory */
|
||||||
|
size_t device_mem_in_use;
|
||||||
|
|
||||||
|
virtual void init_host_memory(size_t preferred_texture_headroom = 0,
|
||||||
|
size_t preferred_working_headroom = 0);
|
||||||
|
virtual void move_textures_to_host(size_t size, bool for_texture);
|
||||||
|
|
||||||
|
/* Allocation, deallocation and copy functions, with corresponding
|
||||||
|
* support of device/host allocations. */
|
||||||
|
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||||
|
virtual void generic_free(device_memory &mem);
|
||||||
|
virtual void generic_copy_to(device_memory &mem);
|
||||||
|
|
||||||
|
/* total - amount of device memory, free - amount of available device memory */
|
||||||
|
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
|
||||||
|
|
||||||
|
virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
|
||||||
|
|
||||||
|
virtual void free_device(void *device_pointer) = 0;
|
||||||
|
|
||||||
|
virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
|
||||||
|
|
||||||
|
virtual void free_host(void *shared_pointer) = 0;
|
||||||
|
|
||||||
|
/* This function should return device pointer corresponding to shared pointer, which
|
||||||
|
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
|
||||||
|
* address transformation is possible and `false` otherwise. */
|
||||||
|
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
|
||||||
|
|
||||||
|
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
#endif /* __DEVICE_H__ */
|
#endif /* __DEVICE_H__ */
|
||||||
|
@@ -53,8 +53,12 @@ void HIPDevice::set_error(const string &error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
|
: GPUDevice(info, stats, profiler)
|
||||||
{
|
{
|
||||||
|
/* Verify that base class types can be used with specific backend types */
|
||||||
|
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
|
||||||
|
static_assert(sizeof(arrayMemObject) == sizeof(hArray));
|
||||||
|
|
||||||
first_error = true;
|
first_error = true;
|
||||||
|
|
||||||
hipDevId = info.num;
|
hipDevId = info.num;
|
||||||
@@ -65,12 +69,6 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
|
|
||||||
need_texture_info = false;
|
need_texture_info = false;
|
||||||
|
|
||||||
device_texture_headroom = 0;
|
|
||||||
device_working_headroom = 0;
|
|
||||||
move_texture_to_host = false;
|
|
||||||
map_host_limit = 0;
|
|
||||||
map_host_used = 0;
|
|
||||||
can_map_host = 0;
|
|
||||||
pitch_alignment = 0;
|
pitch_alignment = 0;
|
||||||
|
|
||||||
/* Initialize HIP. */
|
/* Initialize HIP. */
|
||||||
@@ -91,7 +89,9 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
/* hipDeviceMapHost for mapping host memory when out of device memory.
|
/* hipDeviceMapHost for mapping host memory when out of device memory.
|
||||||
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
|
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
|
||||||
* so we can predict which memory to map to host. */
|
* so we can predict which memory to map to host. */
|
||||||
hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
|
int value;
|
||||||
|
hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
|
||||||
|
can_map_host = value != 0;
|
||||||
|
|
||||||
hip_assert(
|
hip_assert(
|
||||||
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
|
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
|
||||||
@@ -460,305 +460,58 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
|
|||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void HIPDevice::init_host_memory()
|
void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
|
||||||
{
|
|
||||||
/* Limit amount of host mapped memory, because allocating too much can
|
|
||||||
* cause system instability. Leave at least half or 4 GB of system
|
|
||||||
* memory free, whichever is smaller. */
|
|
||||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
|
||||||
size_t system_ram = system_physical_ram();
|
|
||||||
|
|
||||||
if (system_ram > 0) {
|
|
||||||
if (system_ram / 2 > default_limit) {
|
|
||||||
map_host_limit = system_ram - default_limit;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
map_host_limit = system_ram / 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
|
||||||
map_host_limit = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Amount of device memory to keep is free after texture memory
|
|
||||||
* and working memory allocations respectively. We set the working
|
|
||||||
* memory limit headroom lower so that some space is left after all
|
|
||||||
* texture memory allocations. */
|
|
||||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
|
||||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
|
||||||
|
|
||||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
|
||||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
void HIPDevice::load_texture_info()
|
|
||||||
{
|
|
||||||
if (need_texture_info) {
|
|
||||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
|
||||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
|
||||||
need_texture_info = false;
|
|
||||||
texture_info.copy_to_device();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
|
|
||||||
{
|
|
||||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
|
||||||
static bool any_device_moving_textures_to_host = false;
|
|
||||||
if (any_device_moving_textures_to_host) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Signal to reallocate textures in host memory only. */
|
|
||||||
move_texture_to_host = true;
|
|
||||||
|
|
||||||
while (size > 0) {
|
|
||||||
/* Find suitable memory allocation to move. */
|
|
||||||
device_memory *max_mem = NULL;
|
|
||||||
size_t max_size = 0;
|
|
||||||
bool max_is_image = false;
|
|
||||||
|
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
|
||||||
foreach (HIPMemMap::value_type &pair, hip_mem_map) {
|
|
||||||
device_memory &mem = *pair.first;
|
|
||||||
HIPMem *cmem = &pair.second;
|
|
||||||
|
|
||||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
|
||||||
* And need to ignore memory that is already on the host. */
|
|
||||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
|
||||||
(&mem != &texture_info);
|
|
||||||
bool is_image = is_texture && (mem.data_height > 1);
|
|
||||||
|
|
||||||
/* Can't move this type of memory. */
|
|
||||||
if (!is_texture || cmem->array) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For other textures, only move image textures. */
|
|
||||||
if (for_texture && !is_image) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try to move largest allocation, prefer moving images. */
|
|
||||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
|
||||||
max_is_image = is_image;
|
|
||||||
max_size = mem.device_size;
|
|
||||||
max_mem = &mem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
/* Move to host memory. This part is mutex protected since
|
|
||||||
* multiple HIP devices could be moving the memory. The
|
|
||||||
* first one will do it, and the rest will adopt the pointer. */
|
|
||||||
if (max_mem) {
|
|
||||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
|
||||||
|
|
||||||
static thread_mutex move_mutex;
|
|
||||||
thread_scoped_lock lock(move_mutex);
|
|
||||||
|
|
||||||
any_device_moving_textures_to_host = true;
|
|
||||||
|
|
||||||
/* Potentially need to call back into multi device, so pointer mapping
|
|
||||||
* and peer devices are updated. This is also necessary since the device
|
|
||||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
|
||||||
* Unfortunately it does mean that memory is reallocated on all other
|
|
||||||
* devices as well, which is potentially dangerous when still in use (since
|
|
||||||
* a thread rendering on another devices would only be caught in this mutex
|
|
||||||
* if it so happens to do an allocation at the same time as well. */
|
|
||||||
max_mem->device_copy_to();
|
|
||||||
size = (max_size >= size) ? 0 : size - max_size;
|
|
||||||
|
|
||||||
any_device_moving_textures_to_host = false;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
|
||||||
move_texture_to_host = false;
|
|
||||||
|
|
||||||
/* Update texture info array with new pointers. */
|
|
||||||
load_texture_info();
|
|
||||||
}
|
|
||||||
|
|
||||||
HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
|
||||||
{
|
{
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
hipDeviceptr_t device_pointer = 0;
|
|
||||||
size_t size = mem.memory_size() + pitch_padding;
|
|
||||||
|
|
||||||
hipError_t mem_alloc_result = hipErrorOutOfMemory;
|
|
||||||
const char *status = "";
|
|
||||||
|
|
||||||
/* First try allocating in device memory, respecting headroom. We make
|
|
||||||
* an exception for texture info. It is small and frequently accessed,
|
|
||||||
* so treat it as working memory.
|
|
||||||
*
|
|
||||||
* If there is not enough room for working memory, we will try to move
|
|
||||||
* textures to host memory, assuming the performance impact would have
|
|
||||||
* been worse for working memory. */
|
|
||||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
|
||||||
bool is_image = is_texture && (mem.data_height > 1);
|
|
||||||
|
|
||||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
|
||||||
|
|
||||||
size_t total = 0, free = 0;
|
|
||||||
hipMemGetInfo(&free, &total);
|
hipMemGetInfo(&free, &total);
|
||||||
|
|
||||||
/* Move textures to host memory if needed. */
|
|
||||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
|
||||||
move_textures_to_host(size + headroom - free, is_texture);
|
|
||||||
hipMemGetInfo(&free, &total);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Allocate in device memory. */
|
|
||||||
if (!move_texture_to_host && (size + headroom) < free) {
|
|
||||||
mem_alloc_result = hipMalloc(&device_pointer, size);
|
|
||||||
if (mem_alloc_result == hipSuccess) {
|
|
||||||
status = " in device memory";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fall back to mapped host memory if needed and possible. */
|
|
||||||
|
|
||||||
void *shared_pointer = 0;
|
|
||||||
|
|
||||||
if (mem_alloc_result != hipSuccess && can_map_host) {
|
|
||||||
if (mem.shared_pointer) {
|
|
||||||
/* Another device already allocated host memory. */
|
|
||||||
mem_alloc_result = hipSuccess;
|
|
||||||
shared_pointer = mem.shared_pointer;
|
|
||||||
}
|
|
||||||
else if (map_host_used + size < map_host_limit) {
|
|
||||||
/* Allocate host memory ourselves. */
|
|
||||||
mem_alloc_result = hipHostMalloc(
|
|
||||||
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
|
|
||||||
|
|
||||||
assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
|
|
||||||
(mem_alloc_result != hipSuccess && shared_pointer == 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem_alloc_result == hipSuccess) {
|
|
||||||
hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
|
|
||||||
map_host_used += size;
|
|
||||||
status = " in host memory";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem_alloc_result != hipSuccess) {
|
|
||||||
status = " failed, out of device and host memory";
|
|
||||||
set_error("System is out of GPU and shared host memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem.name) {
|
|
||||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
|
||||||
}
|
|
||||||
|
|
||||||
mem.device_pointer = (device_ptr)device_pointer;
|
|
||||||
mem.device_size = size;
|
|
||||||
stats.mem_alloc(size);
|
|
||||||
|
|
||||||
if (!mem.device_pointer) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Insert into map of allocations. */
|
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
|
||||||
HIPMem *cmem = &hip_mem_map[&mem];
|
|
||||||
if (shared_pointer != 0) {
|
|
||||||
/* Replace host pointer with our host allocation. Only works if
|
|
||||||
* HIP memory layout is the same and has no pitch padding. Also
|
|
||||||
* does not work if we move textures to host during a render,
|
|
||||||
* since other devices might be using the memory. */
|
|
||||||
|
|
||||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
|
||||||
mem.host_pointer != shared_pointer) {
|
|
||||||
memcpy(shared_pointer, mem.host_pointer, size);
|
|
||||||
|
|
||||||
/* A Call to device_memory::host_free() should be preceded by
|
|
||||||
* a call to device_memory::device_free() for host memory
|
|
||||||
* allocated by a device to be handled properly. Two exceptions
|
|
||||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
|
||||||
* the current host memory can be assumed to be allocated by
|
|
||||||
* device_memory::host_alloc(), not by a device */
|
|
||||||
|
|
||||||
mem.host_free();
|
|
||||||
mem.host_pointer = shared_pointer;
|
|
||||||
}
|
|
||||||
mem.shared_pointer = shared_pointer;
|
|
||||||
mem.shared_counter++;
|
|
||||||
cmem->use_mapped_host = true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
cmem->use_mapped_host = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return cmem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void HIPDevice::generic_copy_to(device_memory &mem)
|
bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
|
||||||
{
|
{
|
||||||
if (!mem.host_pointer || !mem.device_pointer) {
|
HIPContextScope scope(this);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
|
||||||
* hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
return mem_alloc_result == hipSuccess;
|
||||||
* mem.host_pointer. */
|
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
|
||||||
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
|
||||||
const HIPContextScope scope(this);
|
|
||||||
hip_assert(
|
|
||||||
hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void HIPDevice::generic_free(device_memory &mem)
|
void HIPDevice::free_device(void *device_pointer)
|
||||||
{
|
{
|
||||||
if (mem.device_pointer) {
|
HIPContextScope scope(this);
|
||||||
HIPContextScope scope(this);
|
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
|
||||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
|
||||||
const HIPMem &cmem = hip_mem_map[&mem];
|
|
||||||
|
|
||||||
/* If cmem.use_mapped_host is true, reference counting is used
|
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
|
||||||
* to safely free a mapped host memory. */
|
}
|
||||||
|
|
||||||
if (cmem.use_mapped_host) {
|
bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
|
||||||
assert(mem.shared_pointer);
|
{
|
||||||
if (mem.shared_pointer) {
|
HIPContextScope scope(this);
|
||||||
assert(mem.shared_counter > 0);
|
|
||||||
if (--mem.shared_counter == 0) {
|
|
||||||
if (mem.host_pointer == mem.shared_pointer) {
|
|
||||||
mem.host_pointer = 0;
|
|
||||||
}
|
|
||||||
hipHostFree(mem.shared_pointer);
|
|
||||||
mem.shared_pointer = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
map_host_used -= mem.device_size;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Free device memory. */
|
|
||||||
hip_assert(hipFree(mem.device_pointer));
|
|
||||||
}
|
|
||||||
|
|
||||||
stats.mem_free(mem.device_size);
|
hipError_t mem_alloc_result = hipHostMalloc(
|
||||||
mem.device_pointer = 0;
|
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
|
||||||
mem.device_size = 0;
|
|
||||||
|
|
||||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
return mem_alloc_result == hipSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HIPDevice::free_host(void *shared_pointer)
|
||||||
|
{
|
||||||
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
|
hipHostFree(shared_pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||||
|
{
|
||||||
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
|
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||||
|
{
|
||||||
|
const HIPContextScope scope(this);
|
||||||
|
|
||||||
|
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
|
||||||
}
|
}
|
||||||
|
|
||||||
void HIPDevice::mem_alloc(device_memory &mem)
|
void HIPDevice::mem_alloc(device_memory &mem)
|
||||||
@@ -823,8 +576,8 @@ void HIPDevice::mem_zero(device_memory &mem)
|
|||||||
|
|
||||||
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
||||||
* regardless of mem.host_pointer and mem.shared_pointer. */
|
* regardless of mem.host_pointer and mem.shared_pointer. */
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||||
const HIPContextScope scope(this);
|
const HIPContextScope scope(this);
|
||||||
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
|
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
|
||||||
}
|
}
|
||||||
@@ -951,19 +704,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIPMem *cmem = NULL;
|
Mem *cmem = NULL;
|
||||||
hArray array_3d = NULL;
|
hArray array_3d = NULL;
|
||||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||||
size_t dst_pitch = src_pitch;
|
size_t dst_pitch = src_pitch;
|
||||||
|
|
||||||
if (!mem.is_resident(this)) {
|
if (!mem.is_resident(this)) {
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
cmem = &hip_mem_map[&mem];
|
cmem = &device_mem_map[&mem];
|
||||||
cmem->texobject = 0;
|
cmem->texobject = 0;
|
||||||
|
|
||||||
if (mem.data_depth > 1) {
|
if (mem.data_depth > 1) {
|
||||||
array_3d = (hArray)mem.device_pointer;
|
array_3d = (hArray)mem.device_pointer;
|
||||||
cmem->array = array_3d;
|
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||||
}
|
}
|
||||||
else if (mem.data_height > 0) {
|
else if (mem.data_height > 0) {
|
||||||
dst_pitch = align_up(src_pitch, pitch_alignment);
|
dst_pitch = align_up(src_pitch, pitch_alignment);
|
||||||
@@ -1007,10 +760,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
mem.device_size = size;
|
mem.device_size = size;
|
||||||
stats.mem_alloc(size);
|
stats.mem_alloc(size);
|
||||||
|
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
cmem = &hip_mem_map[&mem];
|
cmem = &device_mem_map[&mem];
|
||||||
cmem->texobject = 0;
|
cmem->texobject = 0;
|
||||||
cmem->array = array_3d;
|
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||||
}
|
}
|
||||||
else if (mem.data_height > 0) {
|
else if (mem.data_height > 0) {
|
||||||
/* 2D texture, using pitch aligned linear memory. */
|
/* 2D texture, using pitch aligned linear memory. */
|
||||||
@@ -1095,8 +848,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
texDesc.filterMode = filter_mode;
|
texDesc.filterMode = filter_mode;
|
||||||
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
|
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
|
||||||
|
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
cmem = &hip_mem_map[&mem];
|
cmem = &device_mem_map[&mem];
|
||||||
|
|
||||||
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||||
|
|
||||||
@@ -1111,9 +864,9 @@ void HIPDevice::tex_free(device_texture &mem)
|
|||||||
{
|
{
|
||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(device_mem_map_mutex);
|
||||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||||
const HIPMem &cmem = hip_mem_map[&mem];
|
const Mem &cmem = device_mem_map[&mem];
|
||||||
|
|
||||||
if (cmem.texobject) {
|
if (cmem.texobject) {
|
||||||
/* Free bindless texture. */
|
/* Free bindless texture. */
|
||||||
@@ -1122,16 +875,16 @@ void HIPDevice::tex_free(device_texture &mem)
|
|||||||
|
|
||||||
if (!mem.is_resident(this)) {
|
if (!mem.is_resident(this)) {
|
||||||
/* Do not free memory here, since it was allocated on a different device. */
|
/* Do not free memory here, since it was allocated on a different device. */
|
||||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
device_mem_map.erase(device_mem_map.find(&mem));
|
||||||
}
|
}
|
||||||
else if (cmem.array) {
|
else if (cmem.array) {
|
||||||
/* Free array. */
|
/* Free array. */
|
||||||
hipArrayDestroy(cmem.array);
|
hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
|
||||||
stats.mem_free(mem.device_size);
|
stats.mem_free(mem.device_size);
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
mem.device_size = 0;
|
mem.device_size = 0;
|
||||||
|
|
||||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
device_mem_map.erase(device_mem_map.find(&mem));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
@@ -1153,7 +906,7 @@ bool HIPDevice::should_use_graphics_interop()
|
|||||||
* possible, but from the empiric measurements it can be considerably slower than using naive
|
* possible, but from the empiric measurements it can be considerably slower than using naive
|
||||||
* pixels copy. */
|
* pixels copy. */
|
||||||
|
|
||||||
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
|
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
|
||||||
# if 0
|
# if 0
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
|
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class DeviceQueue;
|
class DeviceQueue;
|
||||||
|
|
||||||
class HIPDevice : public Device {
|
class HIPDevice : public GPUDevice {
|
||||||
|
|
||||||
friend class HIPContextScope;
|
friend class HIPContextScope;
|
||||||
|
|
||||||
@@ -26,36 +26,11 @@ class HIPDevice : public Device {
|
|||||||
hipDevice_t hipDevice;
|
hipDevice_t hipDevice;
|
||||||
hipCtx_t hipContext;
|
hipCtx_t hipContext;
|
||||||
hipModule_t hipModule;
|
hipModule_t hipModule;
|
||||||
size_t device_texture_headroom;
|
|
||||||
size_t device_working_headroom;
|
|
||||||
bool move_texture_to_host;
|
|
||||||
size_t map_host_used;
|
|
||||||
size_t map_host_limit;
|
|
||||||
int can_map_host;
|
|
||||||
int pitch_alignment;
|
int pitch_alignment;
|
||||||
int hipDevId;
|
int hipDevId;
|
||||||
int hipDevArchitecture;
|
int hipDevArchitecture;
|
||||||
bool first_error;
|
bool first_error;
|
||||||
|
|
||||||
struct HIPMem {
|
|
||||||
HIPMem() : texobject(0), array(0), use_mapped_host(false)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
hipTextureObject_t texobject;
|
|
||||||
hArray array;
|
|
||||||
|
|
||||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
|
||||||
bool use_mapped_host;
|
|
||||||
};
|
|
||||||
typedef map<device_memory *, HIPMem> HIPMemMap;
|
|
||||||
HIPMemMap hip_mem_map;
|
|
||||||
thread_mutex hip_mem_map_mutex;
|
|
||||||
|
|
||||||
/* Bindless Textures */
|
|
||||||
device_vector<TextureInfo> texture_info;
|
|
||||||
bool need_texture_info;
|
|
||||||
|
|
||||||
HIPDeviceKernels kernels;
|
HIPDeviceKernels kernels;
|
||||||
|
|
||||||
static bool have_precompiled_kernels();
|
static bool have_precompiled_kernels();
|
||||||
@@ -81,17 +56,13 @@ class HIPDevice : public Device {
|
|||||||
virtual bool load_kernels(const uint kernel_features) override;
|
virtual bool load_kernels(const uint kernel_features) override;
|
||||||
void reserve_local_memory(const uint kernel_features);
|
void reserve_local_memory(const uint kernel_features);
|
||||||
|
|
||||||
void init_host_memory();
|
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||||
|
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||||
void load_texture_info();
|
virtual void free_device(void *device_pointer) override;
|
||||||
|
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||||
void move_textures_to_host(size_t size, bool for_texture);
|
virtual void free_host(void *shared_pointer) override;
|
||||||
|
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||||
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||||
|
|
||||||
void generic_copy_to(device_memory &mem);
|
|
||||||
|
|
||||||
void generic_free(device_memory &mem);
|
|
||||||
|
|
||||||
void mem_alloc(device_memory &mem) override;
|
void mem_alloc(device_memory &mem) override;
|
||||||
|
|
||||||
|
@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
|
|||||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||||
|
|
||||||
return (major >= 10);
|
return (major >= 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -73,6 +73,10 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
|||||||
return "integrator_terminated_paths_array";
|
return "integrator_terminated_paths_array";
|
||||||
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
||||||
return "integrator_sorted_paths_array";
|
return "integrator_sorted_paths_array";
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS:
|
||||||
|
return "integrator_sort_bucket_pass";
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS:
|
||||||
|
return "integrator_sort_write_pass";
|
||||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
||||||
return "integrator_compact_paths_array";
|
return "integrator_compact_paths_array";
|
||||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
|
case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
|
||||||
|
@@ -247,6 +247,8 @@ class device_memory {
|
|||||||
bool is_resident(Device *sub_device) const;
|
bool is_resident(Device *sub_device) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
friend class Device;
|
||||||
|
friend class GPUDevice;
|
||||||
friend class CUDADevice;
|
friend class CUDADevice;
|
||||||
friend class OptiXDevice;
|
friend class OptiXDevice;
|
||||||
friend class HIPDevice;
|
friend class HIPDevice;
|
||||||
|
@@ -21,6 +21,7 @@ class BVHMetal : public BVH {
|
|||||||
|
|
||||||
API_AVAILABLE(macos(11.0))
|
API_AVAILABLE(macos(11.0))
|
||||||
vector<id<MTLAccelerationStructure>> blas_array;
|
vector<id<MTLAccelerationStructure>> blas_array;
|
||||||
|
vector<uint32_t> blas_lookup;
|
||||||
|
|
||||||
bool motion_blur = false;
|
bool motion_blur = false;
|
||||||
|
|
||||||
|
@@ -816,6 +816,11 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
|
|
||||||
uint32_t instance_index = 0;
|
uint32_t instance_index = 0;
|
||||||
uint32_t motion_transform_index = 0;
|
uint32_t motion_transform_index = 0;
|
||||||
|
|
||||||
|
// allocate look up buffer for wost case scenario
|
||||||
|
uint64_t count = objects.size();
|
||||||
|
blas_lookup.resize(count);
|
||||||
|
|
||||||
for (Object *ob : objects) {
|
for (Object *ob : objects) {
|
||||||
/* Skip non-traceable objects */
|
/* Skip non-traceable objects */
|
||||||
if (!ob->is_traceable())
|
if (!ob->is_traceable())
|
||||||
@@ -843,12 +848,15 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
/* Set user instance ID to object index */
|
/* Set user instance ID to object index */
|
||||||
int object_index = ob->get_device_index();
|
int object_index = ob->get_device_index();
|
||||||
uint32_t user_id = uint32_t(object_index);
|
uint32_t user_id = uint32_t(object_index);
|
||||||
|
int currIndex = instance_index++;
|
||||||
|
assert(user_id < blas_lookup.size());
|
||||||
|
blas_lookup[user_id] = accel_struct_index;
|
||||||
|
|
||||||
/* Bake into the appropriate descriptor */
|
/* Bake into the appropriate descriptor */
|
||||||
if (motion_blur) {
|
if (motion_blur) {
|
||||||
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
||||||
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
||||||
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
|
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
|
||||||
|
|
||||||
desc.accelerationStructureIndex = accel_struct_index;
|
desc.accelerationStructureIndex = accel_struct_index;
|
||||||
desc.userID = user_id;
|
desc.userID = user_id;
|
||||||
@@ -894,7 +902,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
else {
|
else {
|
||||||
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
||||||
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
||||||
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
|
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
|
||||||
|
|
||||||
desc.accelerationStructureIndex = accel_struct_index;
|
desc.accelerationStructureIndex = accel_struct_index;
|
||||||
desc.userID = user_id;
|
desc.userID = user_id;
|
||||||
|
@@ -55,6 +55,9 @@ void device_metal_info(vector<DeviceInfo> &devices)
|
|||||||
info.denoisers = DENOISER_NONE;
|
info.denoisers = DENOISER_NONE;
|
||||||
info.id = id;
|
info.id = id;
|
||||||
|
|
||||||
|
info.has_nanovdb = MetalInfo::get_device_vendor(device) == METAL_GPU_APPLE;
|
||||||
|
info.has_light_tree = MetalInfo::get_device_vendor(device) != METAL_GPU_AMD;
|
||||||
|
|
||||||
devices.push_back(info);
|
devices.push_back(info);
|
||||||
device_index++;
|
device_index++;
|
||||||
}
|
}
|
||||||
|
@@ -67,13 +67,21 @@ class MetalDevice : public Device {
|
|||||||
std::recursive_mutex metal_mem_map_mutex;
|
std::recursive_mutex metal_mem_map_mutex;
|
||||||
|
|
||||||
/* Bindless Textures */
|
/* Bindless Textures */
|
||||||
|
bool is_texture(const TextureInfo &tex);
|
||||||
device_vector<TextureInfo> texture_info;
|
device_vector<TextureInfo> texture_info;
|
||||||
bool need_texture_info;
|
bool need_texture_info;
|
||||||
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
||||||
|
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
|
||||||
|
id<MTLBuffer> buffer_bindings_1d = nil;
|
||||||
id<MTLBuffer> texture_bindings_2d = nil;
|
id<MTLBuffer> texture_bindings_2d = nil;
|
||||||
id<MTLBuffer> texture_bindings_3d = nil;
|
id<MTLBuffer> texture_bindings_3d = nil;
|
||||||
std::vector<id<MTLTexture>> texture_slot_map;
|
std::vector<id<MTLTexture>> texture_slot_map;
|
||||||
|
|
||||||
|
/* BLAS encoding & lookup */
|
||||||
|
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
|
||||||
|
id<MTLBuffer> blas_buffer = nil;
|
||||||
|
id<MTLBuffer> blas_lookup_buffer = nil;
|
||||||
|
|
||||||
bool use_metalrt = false;
|
bool use_metalrt = false;
|
||||||
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
|
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
|
||||||
|
|
||||||
@@ -105,6 +113,8 @@ class MetalDevice : public Device {
|
|||||||
|
|
||||||
bool use_adaptive_compilation();
|
bool use_adaptive_compilation();
|
||||||
|
|
||||||
|
bool use_local_atomic_sort() const;
|
||||||
|
|
||||||
bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
|
bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
|
||||||
|
|
||||||
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
||||||
|
@@ -91,11 +91,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
texture_bindings_2d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
|
|
||||||
texture_bindings_3d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
|
|
||||||
|
|
||||||
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
|
||||||
|
|
||||||
switch (device_vendor) {
|
switch (device_vendor) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@@ -105,6 +100,7 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
}
|
}
|
||||||
case METAL_GPU_AMD: {
|
case METAL_GPU_AMD: {
|
||||||
max_threads_per_threadgroup = 128;
|
max_threads_per_threadgroup = 128;
|
||||||
|
use_metalrt = info.use_metalrt;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case METAL_GPU_APPLE: {
|
case METAL_GPU_APPLE: {
|
||||||
@@ -155,6 +151,16 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
arg_desc_texture.dataType = MTLDataTypeTexture;
|
arg_desc_texture.dataType = MTLDataTypeTexture;
|
||||||
arg_desc_texture.access = MTLArgumentAccessReadOnly;
|
arg_desc_texture.access = MTLArgumentAccessReadOnly;
|
||||||
mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]];
|
mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]];
|
||||||
|
MTLArgumentDescriptor *arg_desc_buffer = [[MTLArgumentDescriptor alloc] init];
|
||||||
|
arg_desc_buffer.dataType = MTLDataTypePointer;
|
||||||
|
arg_desc_buffer.access = MTLArgumentAccessReadOnly;
|
||||||
|
mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
|
||||||
|
|
||||||
|
buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
||||||
|
texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
||||||
|
texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
||||||
|
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||||
|
texture_bindings_3d.allocatedSize);
|
||||||
|
|
||||||
/* command queue for non-tracing work on the GPU */
|
/* command queue for non-tracing work on the GPU */
|
||||||
mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
|
mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
|
||||||
@@ -179,6 +185,8 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
arg_desc_tex.dataType = MTLDataTypePointer;
|
arg_desc_tex.dataType = MTLDataTypePointer;
|
||||||
arg_desc_tex.access = MTLArgumentAccessReadOnly;
|
arg_desc_tex.access = MTLArgumentAccessReadOnly;
|
||||||
|
|
||||||
|
arg_desc_tex.index = index++;
|
||||||
|
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_buf_1d */
|
||||||
arg_desc_tex.index = index++;
|
arg_desc_tex.index = index++;
|
||||||
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */
|
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */
|
||||||
arg_desc_tex.index = index++;
|
arg_desc_tex.index = index++;
|
||||||
@@ -192,6 +200,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure;
|
arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||||
arg_desc_as.access = MTLArgumentAccessReadOnly;
|
arg_desc_as.access = MTLArgumentAccessReadOnly;
|
||||||
|
|
||||||
|
MTLArgumentDescriptor *arg_desc_ptrs = [[MTLArgumentDescriptor alloc] init];
|
||||||
|
arg_desc_ptrs.dataType = MTLDataTypePointer;
|
||||||
|
arg_desc_ptrs.access = MTLArgumentAccessReadOnly;
|
||||||
|
|
||||||
MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init];
|
MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init];
|
||||||
arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable;
|
arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable;
|
||||||
arg_desc_ift.access = MTLArgumentAccessReadOnly;
|
arg_desc_ift.access = MTLArgumentAccessReadOnly;
|
||||||
@@ -204,14 +216,32 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */
|
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */
|
||||||
arg_desc_ift.index = index++;
|
arg_desc_ift.index = index++;
|
||||||
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */
|
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */
|
||||||
|
arg_desc_ift.index = index++;
|
||||||
|
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local_prim */
|
||||||
|
arg_desc_ptrs.index = index++;
|
||||||
|
[ancillary_desc addObject:[arg_desc_ptrs copy]]; /* blas array */
|
||||||
|
arg_desc_ptrs.index = index++;
|
||||||
|
[ancillary_desc addObject:[arg_desc_ptrs copy]]; /* look up table for blas */
|
||||||
|
|
||||||
[arg_desc_ift release];
|
[arg_desc_ift release];
|
||||||
[arg_desc_as release];
|
[arg_desc_as release];
|
||||||
|
[arg_desc_ptrs release];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
|
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
|
||||||
|
|
||||||
|
// preparing the blas arg encoder
|
||||||
|
if (@available(macos 11.0, *)) {
|
||||||
|
if (use_metalrt) {
|
||||||
|
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
|
||||||
|
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||||
|
arg_desc_blas.access = MTLArgumentAccessReadOnly;
|
||||||
|
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
|
||||||
|
[arg_desc_blas release];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < ancillary_desc.count; i++) {
|
for (int i = 0; i < ancillary_desc.count; i++) {
|
||||||
[ancillary_desc[i] release];
|
[ancillary_desc[i] release];
|
||||||
}
|
}
|
||||||
@@ -230,22 +260,26 @@ MetalDevice::~MetalDevice()
|
|||||||
* existing_devices_mutex). */
|
* existing_devices_mutex). */
|
||||||
thread_scoped_lock lock(existing_devices_mutex);
|
thread_scoped_lock lock(existing_devices_mutex);
|
||||||
|
|
||||||
for (auto &tex : texture_slot_map) {
|
int num_resources = texture_info.size();
|
||||||
if (tex) {
|
for (int res = 0; res < num_resources; res++) {
|
||||||
[tex release];
|
if (is_texture(texture_info[res])) {
|
||||||
tex = nil;
|
[texture_slot_map[res] release];
|
||||||
|
texture_slot_map[res] = nil;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_delayed_free_list();
|
flush_delayed_free_list();
|
||||||
|
|
||||||
if (texture_bindings_2d) {
|
if (texture_bindings_2d) {
|
||||||
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||||
|
texture_bindings_3d.allocatedSize);
|
||||||
|
[buffer_bindings_1d release];
|
||||||
[texture_bindings_2d release];
|
[texture_bindings_2d release];
|
||||||
[texture_bindings_3d release];
|
[texture_bindings_3d release];
|
||||||
}
|
}
|
||||||
[mtlTextureArgEncoder release];
|
[mtlTextureArgEncoder release];
|
||||||
[mtlBufferKernelParamsEncoder release];
|
[mtlBufferKernelParamsEncoder release];
|
||||||
|
[mtlBufferArgEncoder release];
|
||||||
[mtlASArgEncoder release];
|
[mtlASArgEncoder release];
|
||||||
[mtlAncillaryArgEncoder release];
|
[mtlAncillaryArgEncoder release];
|
||||||
[mtlGeneralCommandQueue release];
|
[mtlGeneralCommandQueue release];
|
||||||
@@ -271,6 +305,11 @@ bool MetalDevice::use_adaptive_compilation()
|
|||||||
return DebugFlags().metal.adaptive_compile;
|
return DebugFlags().metal.adaptive_compile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MetalDevice::use_local_atomic_sort() const
|
||||||
|
{
|
||||||
|
return DebugFlags().metal.use_local_atomic_sort;
|
||||||
|
}
|
||||||
|
|
||||||
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
||||||
{
|
{
|
||||||
string global_defines;
|
string global_defines;
|
||||||
@@ -278,6 +317,10 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
|||||||
global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
|
global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (use_local_atomic_sort()) {
|
||||||
|
global_defines += "#define __KERNEL_LOCAL_ATOMIC_SORT__\n";
|
||||||
|
}
|
||||||
|
|
||||||
if (use_metalrt) {
|
if (use_metalrt) {
|
||||||
global_defines += "#define __METALRT__\n";
|
global_defines += "#define __METALRT__\n";
|
||||||
if (motion_blur) {
|
if (motion_blur) {
|
||||||
@@ -300,6 +343,9 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
|||||||
break;
|
break;
|
||||||
case METAL_GPU_APPLE:
|
case METAL_GPU_APPLE:
|
||||||
global_defines += "#define __KERNEL_METAL_APPLE__\n";
|
global_defines += "#define __KERNEL_METAL_APPLE__\n";
|
||||||
|
# ifdef WITH_NANOVDB
|
||||||
|
global_defines += "#define WITH_NANOVDB\n";
|
||||||
|
# endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -514,6 +560,11 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MetalDevice::is_texture(const TextureInfo &tex)
|
||||||
|
{
|
||||||
|
return (tex.depth > 0 || tex.height > 0);
|
||||||
|
}
|
||||||
|
|
||||||
void MetalDevice::load_texture_info()
|
void MetalDevice::load_texture_info()
|
||||||
{
|
{
|
||||||
if (need_texture_info) {
|
if (need_texture_info) {
|
||||||
@@ -525,21 +576,20 @@ void MetalDevice::load_texture_info()
|
|||||||
|
|
||||||
for (int tex = 0; tex < num_textures; tex++) {
|
for (int tex = 0; tex < num_textures; tex++) {
|
||||||
uint64_t offset = tex * sizeof(void *);
|
uint64_t offset = tex * sizeof(void *);
|
||||||
|
if (is_texture(texture_info[tex]) && texture_slot_map[tex]) {
|
||||||
id<MTLTexture> metal_texture = texture_slot_map[tex];
|
id<MTLTexture> metal_texture = texture_slot_map[tex];
|
||||||
if (!metal_texture) {
|
|
||||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
|
||||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
|
||||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
|
||||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
MTLTextureType type = metal_texture.textureType;
|
MTLTextureType type = metal_texture.textureType;
|
||||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
||||||
[mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0];
|
[mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0];
|
||||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
||||||
[mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0];
|
[mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0];
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
||||||
|
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||||
|
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
||||||
|
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
||||||
[texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
|
[texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
|
||||||
@@ -558,7 +608,7 @@ void MetalDevice::erase_allocation(device_memory &mem)
|
|||||||
if (it != metal_mem_map.end()) {
|
if (it != metal_mem_map.end()) {
|
||||||
MetalMem *mmem = it->second.get();
|
MetalMem *mmem = it->second.get();
|
||||||
|
|
||||||
/* blank out reference to MetalMem* in the launch params (fixes crash T94736) */
|
/* blank out reference to MetalMem* in the launch params (fixes crash #94736) */
|
||||||
if (mmem->pointer_index >= 0) {
|
if (mmem->pointer_index >= 0) {
|
||||||
device_ptr *pointers = (device_ptr *)&launch_params;
|
device_ptr *pointers = (device_ptr *)&launch_params;
|
||||||
pointers[mmem->pointer_index] = 0;
|
pointers[mmem->pointer_index] = 0;
|
||||||
@@ -712,7 +762,6 @@ void MetalDevice::generic_free(device_memory &mem)
|
|||||||
mem.shared_pointer = 0;
|
mem.shared_pointer = 0;
|
||||||
|
|
||||||
/* Free device memory. */
|
/* Free device memory. */
|
||||||
delayed_free_list.push_back(mmem.mtlBuffer);
|
|
||||||
mmem.mtlBuffer = nil;
|
mmem.mtlBuffer = nil;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -947,7 +996,7 @@ void MetalDevice::global_free(device_memory &mem)
|
|||||||
|
|
||||||
void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
|
void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
|
||||||
{
|
{
|
||||||
generic_alloc(mem);
|
MetalDevice::MetalMem *mmem = generic_alloc(mem);
|
||||||
generic_copy_to(mem);
|
generic_copy_to(mem);
|
||||||
|
|
||||||
/* Resize once */
|
/* Resize once */
|
||||||
@@ -956,27 +1005,32 @@ void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
|
|||||||
/* Allocate some slots in advance, to reduce amount
|
/* Allocate some slots in advance, to reduce amount
|
||||||
* of re-allocations. */
|
* of re-allocations. */
|
||||||
texture_info.resize(round_up(slot + 1, 128));
|
texture_info.resize(round_up(slot + 1, 128));
|
||||||
|
texture_slot_map.resize(round_up(slot + 1, 128));
|
||||||
}
|
}
|
||||||
|
|
||||||
mem.info.data = (uint64_t)mem.device_pointer;
|
|
||||||
|
|
||||||
/* Set Mapping and tag that we need to (re-)upload to device */
|
|
||||||
texture_info[slot] = mem.info;
|
texture_info[slot] = mem.info;
|
||||||
|
uint64_t offset = slot * sizeof(void *);
|
||||||
|
[mtlBufferArgEncoder setArgumentBuffer:buffer_bindings_1d offset:offset];
|
||||||
|
[mtlBufferArgEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
|
||||||
|
texture_info[slot].data = *(uint64_t *)((uint64_t)buffer_bindings_1d.contents + offset);
|
||||||
|
texture_slot_map[slot] = nil;
|
||||||
need_texture_info = true;
|
need_texture_info = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::tex_alloc(device_texture &mem)
|
void MetalDevice::tex_alloc(device_texture &mem)
|
||||||
{
|
{
|
||||||
/* Check that dimensions fit within maximum allowable size.
|
/* Check that dimensions fit within maximum allowable size.
|
||||||
|
* If 1D texture is allocated, use 1D buffer.
|
||||||
* See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
|
* See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
|
||||||
if (mem.data_width > 16384 || mem.data_height > 16384) {
|
if (mem.data_height > 0) {
|
||||||
set_error(string_printf(
|
if (mem.data_width > 16384 || mem.data_height > 16384) {
|
||||||
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
|
set_error(string_printf(
|
||||||
mem.data_width,
|
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
|
||||||
mem.data_height));
|
mem.data_width,
|
||||||
return;
|
mem.data_height));
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MTLStorageMode storage_mode = MTLStorageModeManaged;
|
MTLStorageMode storage_mode = MTLStorageModeManaged;
|
||||||
if (@available(macos 10.15, *)) {
|
if (@available(macos 10.15, *)) {
|
||||||
if ([mtlDevice hasUnifiedMemory] &&
|
if ([mtlDevice hasUnifiedMemory] &&
|
||||||
@@ -1116,8 +1170,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
bytesPerRow:src_pitch];
|
bytesPerRow:src_pitch];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
assert(0);
|
|
||||||
/* 1D texture, using linear memory. */
|
/* 1D texture, using linear memory. */
|
||||||
|
tex_alloc_as_buffer(mem);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mem.device_pointer = (device_ptr)mtlTexture;
|
mem.device_pointer = (device_ptr)mtlTexture;
|
||||||
@@ -1141,17 +1196,22 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
|
ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
|
||||||
if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) {
|
if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) {
|
||||||
if (texture_bindings_2d) {
|
if (texture_bindings_2d) {
|
||||||
|
delayed_free_list.push_back(buffer_bindings_1d);
|
||||||
delayed_free_list.push_back(texture_bindings_2d);
|
delayed_free_list.push_back(texture_bindings_2d);
|
||||||
delayed_free_list.push_back(texture_bindings_3d);
|
delayed_free_list.push_back(texture_bindings_3d);
|
||||||
|
|
||||||
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||||
|
texture_bindings_3d.allocatedSize);
|
||||||
}
|
}
|
||||||
|
buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||||
|
options:default_storage_mode];
|
||||||
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
|
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||||
options:default_storage_mode];
|
options:default_storage_mode];
|
||||||
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
|
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||||
options:default_storage_mode];
|
options:default_storage_mode];
|
||||||
|
|
||||||
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||||
|
texture_bindings_3d.allocatedSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1178,12 +1238,18 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
|
|
||||||
void MetalDevice::tex_free(device_texture &mem)
|
void MetalDevice::tex_free(device_texture &mem)
|
||||||
{
|
{
|
||||||
|
if (mem.data_depth == 0 && mem.data_height == 0) {
|
||||||
|
generic_free(mem);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (metal_mem_map.count(&mem)) {
|
if (metal_mem_map.count(&mem)) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
MetalMem &mmem = *metal_mem_map.at(&mem);
|
||||||
|
|
||||||
assert(texture_slot_map[mem.slot] == mmem.mtlTexture);
|
assert(texture_slot_map[mem.slot] == mmem.mtlTexture);
|
||||||
texture_slot_map[mem.slot] = nil;
|
if (texture_slot_map[mem.slot] == mmem.mtlTexture)
|
||||||
|
texture_slot_map[mem.slot] = nil;
|
||||||
|
|
||||||
if (mmem.mtlTexture) {
|
if (mmem.mtlTexture) {
|
||||||
/* Free bindless texture. */
|
/* Free bindless texture. */
|
||||||
@@ -1231,6 +1297,33 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
if (@available(macos 11.0, *)) {
|
if (@available(macos 11.0, *)) {
|
||||||
if (bvh->params.top_level) {
|
if (bvh->params.top_level) {
|
||||||
bvhMetalRT = bvh_metal;
|
bvhMetalRT = bvh_metal;
|
||||||
|
|
||||||
|
// allocate required buffers for BLAS array
|
||||||
|
uint64_t count = bvhMetalRT->blas_array.size();
|
||||||
|
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
|
||||||
|
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
|
||||||
|
stats.mem_alloc(blas_buffer.allocatedSize);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < count; ++i) {
|
||||||
|
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
|
||||||
|
offset:i * mtlBlasArgEncoder.encodedLength];
|
||||||
|
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
|
||||||
|
}
|
||||||
|
|
||||||
|
count = bvhMetalRT->blas_lookup.size();
|
||||||
|
bufferSize = sizeof(uint32_t) * count;
|
||||||
|
blas_lookup_buffer = [mtlDevice newBufferWithLength:bufferSize
|
||||||
|
options:default_storage_mode];
|
||||||
|
stats.mem_alloc(blas_lookup_buffer.allocatedSize);
|
||||||
|
|
||||||
|
memcpy([blas_lookup_buffer contents],
|
||||||
|
bvhMetalRT -> blas_lookup.data(),
|
||||||
|
blas_lookup_buffer.allocatedSize);
|
||||||
|
|
||||||
|
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
||||||
|
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
|
||||||
|
[blas_lookup_buffer didModifyRange:NSMakeRange(0, blas_lookup_buffer.length)];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -19,6 +19,8 @@ enum {
|
|||||||
METALRT_FUNC_SHADOW_BOX,
|
METALRT_FUNC_SHADOW_BOX,
|
||||||
METALRT_FUNC_LOCAL_TRI,
|
METALRT_FUNC_LOCAL_TRI,
|
||||||
METALRT_FUNC_LOCAL_BOX,
|
METALRT_FUNC_LOCAL_BOX,
|
||||||
|
METALRT_FUNC_LOCAL_TRI_PRIM,
|
||||||
|
METALRT_FUNC_LOCAL_BOX_PRIM,
|
||||||
METALRT_FUNC_CURVE_RIBBON,
|
METALRT_FUNC_CURVE_RIBBON,
|
||||||
METALRT_FUNC_CURVE_RIBBON_SHADOW,
|
METALRT_FUNC_CURVE_RIBBON_SHADOW,
|
||||||
METALRT_FUNC_CURVE_ALL,
|
METALRT_FUNC_CURVE_ALL,
|
||||||
@@ -28,7 +30,13 @@ enum {
|
|||||||
METALRT_FUNC_NUM
|
METALRT_FUNC_NUM
|
||||||
};
|
};
|
||||||
|
|
||||||
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
enum {
|
||||||
|
METALRT_TABLE_DEFAULT,
|
||||||
|
METALRT_TABLE_SHADOW,
|
||||||
|
METALRT_TABLE_LOCAL,
|
||||||
|
METALRT_TABLE_LOCAL_PRIM,
|
||||||
|
METALRT_TABLE_NUM
|
||||||
|
};
|
||||||
|
|
||||||
/* Pipeline State Object types */
|
/* Pipeline State Object types */
|
||||||
enum MetalPipelineType {
|
enum MetalPipelineType {
|
||||||
|
@@ -87,6 +87,9 @@ struct ShaderCache {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS] = {1024, 1024};
|
||||||
|
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS] = {1024, 1024};
|
||||||
}
|
}
|
||||||
~ShaderCache();
|
~ShaderCache();
|
||||||
|
|
||||||
@@ -521,6 +524,8 @@ void MetalKernelPipeline::compile()
|
|||||||
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||||
"__anyhit__cycles_metalrt_local_hit_tri",
|
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||||
"__anyhit__cycles_metalrt_local_hit_box",
|
"__anyhit__cycles_metalrt_local_hit_box",
|
||||||
|
"__anyhit__cycles_metalrt_local_hit_tri_prim",
|
||||||
|
"__anyhit__cycles_metalrt_local_hit_box_prim",
|
||||||
"__intersection__curve_ribbon",
|
"__intersection__curve_ribbon",
|
||||||
"__intersection__curve_ribbon_shadow",
|
"__intersection__curve_ribbon_shadow",
|
||||||
"__intersection__curve_all",
|
"__intersection__curve_all",
|
||||||
@@ -611,11 +616,17 @@ void MetalKernelPipeline::compile()
|
|||||||
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
|
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
|
||||||
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
|
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
|
||||||
nil];
|
nil];
|
||||||
|
table_functions[METALRT_TABLE_LOCAL_PRIM] = [NSArray
|
||||||
|
arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI_PRIM],
|
||||||
|
rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
|
||||||
|
rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
|
||||||
|
nil];
|
||||||
|
|
||||||
NSMutableSet *unique_functions = [NSMutableSet
|
NSMutableSet *unique_functions = [NSMutableSet
|
||||||
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
|
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
|
||||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
|
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
|
||||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
|
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
|
||||||
|
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL_PRIM]];
|
||||||
|
|
||||||
if (kernel_has_intersection(device_kernel)) {
|
if (kernel_has_intersection(device_kernel)) {
|
||||||
linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
|
linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
|
||||||
|
@@ -25,6 +25,7 @@ class MetalDeviceQueue : public DeviceQueue {
|
|||||||
virtual int num_concurrent_states(const size_t) const override;
|
virtual int num_concurrent_states(const size_t) const override;
|
||||||
virtual int num_concurrent_busy_states(const size_t) const override;
|
virtual int num_concurrent_busy_states(const size_t) const override;
|
||||||
virtual int num_sort_partition_elements() const override;
|
virtual int num_sort_partition_elements() const override;
|
||||||
|
virtual bool supports_local_atomic_sort() const override;
|
||||||
|
|
||||||
virtual void init_execution() override;
|
virtual void init_execution() override;
|
||||||
|
|
||||||
|
@@ -315,6 +315,11 @@ int MetalDeviceQueue::num_sort_partition_elements() const
|
|||||||
return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
|
return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceQueue::supports_local_atomic_sort() const
|
||||||
|
{
|
||||||
|
return metal_device_->use_local_atomic_sort();
|
||||||
|
}
|
||||||
|
|
||||||
void MetalDeviceQueue::init_execution()
|
void MetalDeviceQueue::init_execution()
|
||||||
{
|
{
|
||||||
/* Synchronize all textures and memory copies before executing task. */
|
/* Synchronize all textures and memory copies before executing task. */
|
||||||
@@ -472,11 +477,21 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
|
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
|
||||||
offset:0
|
offset:0
|
||||||
atIndex:1];
|
atIndex:1];
|
||||||
|
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->buffer_bindings_1d
|
||||||
|
offset:0
|
||||||
|
atIndex:2];
|
||||||
|
|
||||||
if (@available(macos 12.0, *)) {
|
if (@available(macos 12.0, *)) {
|
||||||
if (metal_device_->use_metalrt) {
|
if (metal_device_->use_metalrt) {
|
||||||
if (metal_device_->bvhMetalRT) {
|
if (metal_device_->bvhMetalRT) {
|
||||||
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
|
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
|
||||||
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
|
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
|
||||||
|
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
|
||||||
|
offset:0
|
||||||
|
atIndex:8];
|
||||||
|
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
|
||||||
|
offset:0
|
||||||
|
atIndex:9];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||||
@@ -486,13 +501,13 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
atIndex:1];
|
atIndex:1];
|
||||||
[metal_device_->mtlAncillaryArgEncoder
|
[metal_device_->mtlAncillaryArgEncoder
|
||||||
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
|
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
|
||||||
atIndex:3 + table];
|
atIndex:4 + table];
|
||||||
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
|
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
|
||||||
usage:MTLResourceUsageRead];
|
usage:MTLResourceUsageRead];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
[metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
|
[metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
|
||||||
atIndex:3 + table];
|
atIndex:4 + table];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -527,6 +542,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
if (bvhMetalRT) {
|
if (bvhMetalRT) {
|
||||||
/* Mark all Accelerations resources as used */
|
/* Mark all Accelerations resources as used */
|
||||||
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
|
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
|
||||||
|
[mtlComputeCommandEncoder useResource:metal_device_->blas_buffer
|
||||||
|
usage:MTLResourceUsageRead];
|
||||||
|
[mtlComputeCommandEncoder useResource:metal_device_->blas_lookup_buffer
|
||||||
|
usage:MTLResourceUsageRead];
|
||||||
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
|
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
|
||||||
count:bvhMetalRT->blas_array.size()
|
count:bvhMetalRT->blas_array.size()
|
||||||
usage:MTLResourceUsageRead];
|
usage:MTLResourceUsageRead];
|
||||||
@@ -553,13 +572,24 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
/* See parallel_active_index.h for why this amount of shared memory is needed.
|
/* See parallel_active_index.h for why this amount of shared memory is needed.
|
||||||
* Rounded up to 16 bytes for Metal */
|
* Rounded up to 16 bytes for Metal */
|
||||||
shared_mem_bytes = (int)round_up((num_threads_per_block + 1) * sizeof(int), 16);
|
shared_mem_bytes = (int)round_up((num_threads_per_block + 1) * sizeof(int), 16);
|
||||||
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS: {
|
||||||
|
int key_count = metal_device_->launch_params.data.max_shaders;
|
||||||
|
shared_mem_bytes = (int)round_up(key_count * sizeof(int), 16);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (shared_mem_bytes) {
|
||||||
|
assert(shared_mem_bytes <= 32 * 1024);
|
||||||
|
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
||||||
|
}
|
||||||
|
|
||||||
MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
|
MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
|
||||||
divide_up(work_size, num_threads_per_block), 1, 1);
|
divide_up(work_size, num_threads_per_block), 1, 1);
|
||||||
MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
|
MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
|
||||||
@@ -848,6 +878,7 @@ void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
|
|||||||
/* ancillaries */
|
/* ancillaries */
|
||||||
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
|
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
|
||||||
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
|
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
|
||||||
|
[mtlComputeEncoder_ useResource:metal_device_->buffer_bindings_1d usage:MTLResourceUsageRead];
|
||||||
}
|
}
|
||||||
|
|
||||||
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
|
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
|
||||||
|
@@ -64,6 +64,12 @@ MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
|
|||||||
return METAL_GPU_INTEL;
|
return METAL_GPU_INTEL;
|
||||||
}
|
}
|
||||||
else if (strstr(device_name, "AMD")) {
|
else if (strstr(device_name, "AMD")) {
|
||||||
|
/* Setting this env var hides AMD devices thus exposing any integrated Intel devices. */
|
||||||
|
if (auto str = getenv("CYCLES_METAL_FORCE_INTEL")) {
|
||||||
|
if (atoi(str)) {
|
||||||
|
return METAL_GPU_UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
return METAL_GPU_AMD;
|
return METAL_GPU_AMD;
|
||||||
}
|
}
|
||||||
else if (strstr(device_name, "Apple")) {
|
else if (strstr(device_name, "Apple")) {
|
||||||
@@ -96,6 +102,15 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
|||||||
return usable_devices;
|
return usable_devices;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the system has both an AMD GPU (discrete) and an Intel one (integrated), prefer the AMD
|
||||||
|
* one. This can be overridden with CYCLES_METAL_FORCE_INTEL. */
|
||||||
|
bool has_usable_amd_gpu = false;
|
||||||
|
if (@available(macos 12.3, *)) {
|
||||||
|
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
||||||
|
has_usable_amd_gpu |= (get_device_vendor(device) == METAL_GPU_AMD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
metal_printf("Usable Metal devices:\n");
|
metal_printf("Usable Metal devices:\n");
|
||||||
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
||||||
string device_name = get_device_name(device);
|
string device_name = get_device_name(device);
|
||||||
@@ -111,8 +126,10 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
|||||||
}
|
}
|
||||||
|
|
||||||
# if defined(MAC_OS_VERSION_13_0)
|
# if defined(MAC_OS_VERSION_13_0)
|
||||||
if (@available(macos 13.0, *)) {
|
if (!has_usable_amd_gpu) {
|
||||||
usable |= (vendor == METAL_GPU_INTEL);
|
if (@available(macos 13.0, *)) {
|
||||||
|
usable |= (vendor == METAL_GPU_INTEL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
@@ -854,12 +854,14 @@ bool OptiXDevice::load_osl_kernels()
|
|||||||
context, group_descs, 2, &group_options, nullptr, 0, &osl_groups[i * 2]));
|
context, group_descs, 2, &group_options, nullptr, 0, &osl_groups[i * 2]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
|
||||||
vector<OptixStackSizes> osl_stack_size(osl_groups.size());
|
vector<OptixStackSizes> osl_stack_size(osl_groups.size());
|
||||||
|
|
||||||
/* Update SBT with new entries. */
|
/* Update SBT with new entries. */
|
||||||
sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
|
sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
|
||||||
for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
|
for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
|
||||||
optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
|
optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
|
||||||
|
optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < osl_groups.size(); ++i) {
|
for (size_t i = 0; i < osl_groups.size(); ++i) {
|
||||||
if (osl_groups[i] != NULL) {
|
if (osl_groups[i] != NULL) {
|
||||||
@@ -907,13 +909,15 @@ bool OptiXDevice::load_osl_kernels()
|
|||||||
0,
|
0,
|
||||||
&pipelines[PIP_SHADE]));
|
&pipelines[PIP_SHADE]));
|
||||||
|
|
||||||
|
const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
|
||||||
|
stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
|
||||||
unsigned int dss = 0;
|
unsigned int dss = 0;
|
||||||
for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
|
for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
|
||||||
dss = std::max(dss, osl_stack_size[i].dssDC);
|
dss = std::max(dss, osl_stack_size[i].dssDC);
|
||||||
}
|
}
|
||||||
|
|
||||||
optix_assert(optixPipelineSetStackSize(
|
optix_assert(optixPipelineSetStackSize(
|
||||||
pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 2));
|
pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
return !have_error();
|
return !have_error();
|
||||||
|
@@ -112,6 +112,13 @@ class DeviceQueue {
|
|||||||
return 65536;
|
return 65536;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Does device support local atomic sorting kernels (INTEGRATOR_SORT_BUCKET_PASS and
|
||||||
|
* INTEGRATOR_SORT_WRITE_PASS)? */
|
||||||
|
virtual bool supports_local_atomic_sort() const
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Initialize execution of kernels on this queue.
|
/* Initialize execution of kernels on this queue.
|
||||||
*
|
*
|
||||||
* Will, for example, load all data required by the kernels from Device to global or path state.
|
* Will, for example, load all data required by the kernels from Device to global or path state.
|
||||||
|
@@ -71,6 +71,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
|||||||
device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE),
|
device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE),
|
||||||
integrator_shader_sort_prefix_sum_(
|
integrator_shader_sort_prefix_sum_(
|
||||||
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
|
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
|
||||||
|
integrator_shader_sort_partition_key_offsets_(
|
||||||
|
device, "integrator_shader_sort_partition_key_offsets", MEM_READ_WRITE),
|
||||||
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
|
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
|
||||||
integrator_next_shadow_path_index_(
|
integrator_next_shadow_path_index_(
|
||||||
device, "integrator_next_shadow_path_index", MEM_READ_WRITE),
|
device, "integrator_next_shadow_path_index", MEM_READ_WRITE),
|
||||||
@@ -207,33 +209,45 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
|
|||||||
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
|
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
|
||||||
num_sort_partitions_);
|
num_sort_partitions_);
|
||||||
|
|
||||||
/* Allocate arrays for shader sorting. */
|
if (num_sort_partitions_ > 1 && queue_->supports_local_atomic_sort()) {
|
||||||
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
/* Allocate array for partitioned shader sorting using local atomics. */
|
||||||
if (integrator_shader_sort_counter_.size() < sort_buckets) {
|
const int num_offsets = (device_scene_->data.max_shaders + 1) * num_sort_partitions_;
|
||||||
integrator_shader_sort_counter_.alloc(sort_buckets);
|
if (integrator_shader_sort_partition_key_offsets_.size() < num_offsets) {
|
||||||
integrator_shader_sort_counter_.zero_to_device();
|
integrator_shader_sort_partition_key_offsets_.alloc(num_offsets);
|
||||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
|
integrator_shader_sort_partition_key_offsets_.zero_to_device();
|
||||||
(int *)integrator_shader_sort_counter_.device_pointer;
|
|
||||||
|
|
||||||
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
|
|
||||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
|
||||||
if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
|
|
||||||
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
|
|
||||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
|
||||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
|
||||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
|
||||||
}
|
}
|
||||||
|
integrator_state_gpu_.sort_partition_key_offsets =
|
||||||
|
(int *)integrator_shader_sort_partition_key_offsets_.device_pointer;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
/* Allocate arrays for shader sorting. */
|
||||||
|
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||||
|
if (integrator_shader_sort_counter_.size() < sort_buckets) {
|
||||||
|
integrator_shader_sort_counter_.alloc(sort_buckets);
|
||||||
|
integrator_shader_sort_counter_.zero_to_device();
|
||||||
|
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
|
||||||
|
(int *)integrator_shader_sort_counter_.device_pointer;
|
||||||
|
|
||||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
|
||||||
if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
|
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||||
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
|
}
|
||||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
|
||||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
|
||||||
|
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
|
||||||
|
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||||
|
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||||
|
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||||
|
if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
|
||||||
|
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
|
||||||
|
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||||
|
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||||
|
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -451,8 +465,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
work_size = num_queued;
|
work_size = num_queued;
|
||||||
d_path_index = queued_paths_.device_pointer;
|
d_path_index = queued_paths_.device_pointer;
|
||||||
|
|
||||||
compute_sorted_queued_paths(
|
compute_sorted_queued_paths(kernel, num_paths_limit);
|
||||||
DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, kernel, num_paths_limit);
|
|
||||||
}
|
}
|
||||||
else if (num_queued < work_size) {
|
else if (num_queued < work_size) {
|
||||||
work_size = num_queued;
|
work_size = num_queued;
|
||||||
@@ -511,11 +524,26 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel queued_kernel,
|
||||||
DeviceKernel queued_kernel,
|
|
||||||
const int num_paths_limit)
|
const int num_paths_limit)
|
||||||
{
|
{
|
||||||
int d_queued_kernel = queued_kernel;
|
int d_queued_kernel = queued_kernel;
|
||||||
|
|
||||||
|
/* Launch kernel to fill the active paths arrays. */
|
||||||
|
if (num_sort_partitions_ > 1 && queue_->supports_local_atomic_sort()) {
|
||||||
|
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
||||||
|
device_ptr d_queued_paths = queued_paths_.device_pointer;
|
||||||
|
|
||||||
|
int partition_size = (int)integrator_state_gpu_.sort_partition_divisor;
|
||||||
|
|
||||||
|
DeviceKernelArguments args(
|
||||||
|
&work_size, &partition_size, &num_paths_limit, &d_queued_paths, &d_queued_kernel);
|
||||||
|
|
||||||
|
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS, 1024 * num_sort_partitions_, args);
|
||||||
|
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS, 1024 * num_sort_partitions_, args);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
device_ptr d_counter = (device_ptr)integrator_state_gpu_.sort_key_counter[d_queued_kernel];
|
device_ptr d_counter = (device_ptr)integrator_state_gpu_.sort_key_counter[d_queued_kernel];
|
||||||
device_ptr d_prefix_sum = integrator_shader_sort_prefix_sum_.device_pointer;
|
device_ptr d_prefix_sum = integrator_shader_sort_prefix_sum_.device_pointer;
|
||||||
assert(d_counter != 0 && d_prefix_sum != 0);
|
assert(d_counter != 0 && d_prefix_sum != 0);
|
||||||
@@ -552,7 +580,7 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
|||||||
&d_prefix_sum,
|
&d_prefix_sum,
|
||||||
&d_queued_kernel);
|
&d_queued_kernel);
|
||||||
|
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, work_size, args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -70,9 +70,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||||||
void enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit = INT_MAX);
|
void enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit = INT_MAX);
|
||||||
|
|
||||||
void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel);
|
void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel);
|
||||||
void compute_sorted_queued_paths(DeviceKernel kernel,
|
void compute_sorted_queued_paths(DeviceKernel queued_kernel, const int num_paths_limit);
|
||||||
DeviceKernel queued_kernel,
|
|
||||||
const int num_paths_limit);
|
|
||||||
|
|
||||||
void compact_main_paths(const int num_active_paths);
|
void compact_main_paths(const int num_active_paths);
|
||||||
void compact_shadow_paths();
|
void compact_shadow_paths();
|
||||||
@@ -135,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||||||
device_vector<int> integrator_shader_raytrace_sort_counter_;
|
device_vector<int> integrator_shader_raytrace_sort_counter_;
|
||||||
device_vector<int> integrator_shader_mnee_sort_counter_;
|
device_vector<int> integrator_shader_mnee_sort_counter_;
|
||||||
device_vector<int> integrator_shader_sort_prefix_sum_;
|
device_vector<int> integrator_shader_sort_prefix_sum_;
|
||||||
|
device_vector<int> integrator_shader_sort_partition_key_offsets_;
|
||||||
/* Path split. */
|
/* Path split. */
|
||||||
device_vector<int> integrator_next_main_path_index_;
|
device_vector<int> integrator_next_main_path_index_;
|
||||||
device_vector<int> integrator_next_shadow_path_index_;
|
device_vector<int> integrator_next_shadow_path_index_;
|
||||||
|
@@ -886,7 +886,7 @@ int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) c
|
|||||||
{
|
{
|
||||||
/* Special trick for fast navigation: schedule multiple samples during fast navigation
|
/* Special trick for fast navigation: schedule multiple samples during fast navigation
|
||||||
* (which will prefer to use lower resolution to keep up with refresh rate). This gives more
|
* (which will prefer to use lower resolution to keep up with refresh rate). This gives more
|
||||||
* usable visual feedback for artists. There are a couple of tricks though. */
|
* usable visual feedback for artists. */
|
||||||
|
|
||||||
if (is_denoise_active_during_update()) {
|
if (is_denoise_active_during_update()) {
|
||||||
/* When denoising is used during navigation prefer using a higher resolution with less samples
|
/* When denoising is used during navigation prefer using a higher resolution with less samples
|
||||||
@@ -896,25 +896,12 @@ int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) c
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resolution_divider <= pixel_size_) {
|
/* Schedule samples equal to the resolution divider up to a maximum of 4.
|
||||||
/* When resolution divider is at or below pixel size, schedule one sample. This doesn't effect
|
* The idea is to have enough information on the screen by increasing the sample count as the
|
||||||
* the sample count at this resolution division, but instead assists in the calculation of
|
* resolution is decreased. */
|
||||||
* the resolution divider. */
|
/* NOTE: Changing this formula will change the formula in
|
||||||
return 1;
|
* `RenderScheduler::calculate_resolution_divider_for_time()`. */
|
||||||
}
|
return min(max(1, resolution_divider / pixel_size_), 4);
|
||||||
|
|
||||||
if (resolution_divider == pixel_size_ * 2) {
|
|
||||||
/* When resolution divider is the previous step to the final resolution, schedule two samples.
|
|
||||||
* This is so that rendering on lower resolution does not exceed time that it takes to render
|
|
||||||
* first sample at the full resolution. */
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Always render 4 samples, even if scene is configured for less.
|
|
||||||
* The idea here is to have enough information on the screen. Resolution divider of 2 allows us
|
|
||||||
* to have 4 time extra samples, so overall worst case timing is the same as the final resolution
|
|
||||||
* at one sample. */
|
|
||||||
return 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderScheduler::work_need_adaptive_filter() const
|
bool RenderScheduler::work_need_adaptive_filter() const
|
||||||
@@ -1100,9 +1087,10 @@ void RenderScheduler::update_start_resolution_divider()
|
|||||||
/* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
|
/* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
|
||||||
* render time is somewhere on a boundary between two resolutions. */
|
* render time is somewhere on a boundary between two resolutions. */
|
||||||
|
|
||||||
/* Never increase resolution to higher than the pixel size (which is possible if the scene is
|
/* Don't let resolution drop below the desired one. It's better to be slow than provide an
|
||||||
* simple and compute device is fast). */
|
* unreadable viewport render. */
|
||||||
start_resolution_divider_ = max(resolution_divider_for_update, pixel_size_);
|
start_resolution_divider_ = min(resolution_divider_for_update,
|
||||||
|
default_start_resolution_divider_);
|
||||||
|
|
||||||
VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
|
VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
|
||||||
}
|
}
|
||||||
@@ -1187,24 +1175,24 @@ void RenderScheduler::check_time_limit_reached()
|
|||||||
|
|
||||||
int RenderScheduler::calculate_resolution_divider_for_time(double desired_time, double actual_time)
|
int RenderScheduler::calculate_resolution_divider_for_time(double desired_time, double actual_time)
|
||||||
{
|
{
|
||||||
/* TODO(sergey): There should a non-iterative analytical formula here. */
|
const double ratio_between_times = actual_time / desired_time;
|
||||||
|
|
||||||
int resolution_divider = 1;
|
/* We can pass `ratio_between_times` to `get_num_samples_during_navigation()` to get our
|
||||||
|
* navigation samples because the equation for calculating the resolution divider is as follows:
|
||||||
|
* `actual_time / desired_time = sqr(resolution_divider) / sample_count`.
|
||||||
|
* While `resolution_divider` is less than or equal to 4, `resolution_divider = sample_count`
|
||||||
|
* (This relationship is determined in `get_num_samples_during_navigation()`). With some
|
||||||
|
* substitution we end up with `actual_time / desired_time = resolution_divider` while the
|
||||||
|
* resolution divider is less than or equal to 4. Once the resolution divider increases above 4,
|
||||||
|
* the relationship of `actual_time / desired_time = resolution_divider` is no longer true,
|
||||||
|
* however the sample count retrieved from `get_num_samples_during_navigation()` is still
|
||||||
|
* accurate if we continue using this assumption. It should be noted that the interaction between
|
||||||
|
* `pixel_size`, sample count, and resolution divider are automatically accounted for and that's
|
||||||
|
* why `pixel_size` isn't included in any of the equations. */
|
||||||
|
const int navigation_samples = get_num_samples_during_navigation(
|
||||||
|
ceil_to_int(ratio_between_times));
|
||||||
|
|
||||||
/* This algorithm iterates through resolution dividers until a divider is found that achieves
|
return ceil_to_int(sqrt(navigation_samples * ratio_between_times));
|
||||||
* the desired render time. A limit of default_start_resolution_divider_ is put in place as the
|
|
||||||
* maximum resolution divider to avoid an unreadable viewport due to a low resolution.
|
|
||||||
* pre_resolution_division_samples and post_resolution_division_samples are used in this
|
|
||||||
* calculation to better predict the performance impact of changing resolution divisions as
|
|
||||||
* the sample count can also change between resolution divisions. */
|
|
||||||
while (actual_time > desired_time && resolution_divider < default_start_resolution_divider_) {
|
|
||||||
int pre_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
|
|
||||||
resolution_divider = resolution_divider * 2;
|
|
||||||
int post_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
|
|
||||||
actual_time /= 4.0 * pre_resolution_division_samples / post_resolution_division_samples;
|
|
||||||
}
|
|
||||||
|
|
||||||
return resolution_divider;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int calculate_resolution_divider_for_resolution(int width, int height, int resolution)
|
int calculate_resolution_divider_for_resolution(int width, int height, int resolution)
|
||||||
|
@@ -412,11 +412,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
# warn for other versions
|
# warn for other versions
|
||||||
if((CUDA_VERSION STREQUAL "101") OR
|
if((CUDA_VERSION STREQUAL "101") OR
|
||||||
(CUDA_VERSION STREQUAL "102") OR
|
(CUDA_VERSION STREQUAL "102") OR
|
||||||
(CUDA_VERSION_MAJOR STREQUAL "11"))
|
(CUDA_VERSION_MAJOR STREQUAL "11") OR
|
||||||
|
(CUDA_VERSION_MAJOR STREQUAL "12"))
|
||||||
else()
|
else()
|
||||||
message(WARNING
|
message(WARNING
|
||||||
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
|
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
|
||||||
"build may succeed but only CUDA 11, 10.2 and 10.1 have been tested")
|
"build may succeed but only CUDA 12, 11, 10.2 and 10.1 have been tested")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# build for each arch
|
# build for each arch
|
||||||
@@ -514,6 +515,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
else()
|
else()
|
||||||
message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
|
message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
|
||||||
endif()
|
endif()
|
||||||
|
elseif(${arch} MATCHES ".*_3.")
|
||||||
|
if(DEFINED CUDA11_NVCC_EXECUTABLE)
|
||||||
|
set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
|
||||||
|
set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
|
||||||
|
elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
|
||||||
|
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
||||||
|
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
||||||
|
else()
|
||||||
|
message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
|
||||||
|
endif()
|
||||||
elseif(${arch} MATCHES ".*_7." AND "${CUDA_VERSION}" LESS 100)
|
elseif(${arch} MATCHES ".*_7." AND "${CUDA_VERSION}" LESS 100)
|
||||||
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
|
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
|
||||||
elseif(${arch} MATCHES ".*_8.")
|
elseif(${arch} MATCHES ".*_8.")
|
||||||
|
@@ -661,7 +661,8 @@ ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd, ccl_private const ShaderClosure *sc)
|
ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd,
|
||||||
|
ccl_private const ShaderClosure *sc)
|
||||||
{
|
{
|
||||||
Spectrum albedo = sc->weight;
|
Spectrum albedo = sc->weight;
|
||||||
/* Some closures include additional components such as Fresnel terms that cause their albedo to
|
/* Some closures include additional components such as Fresnel terms that cause their albedo to
|
||||||
@@ -685,7 +686,7 @@ ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd, ccl_pri
|
|||||||
albedo *= ((ccl_private const PrincipledSheenBsdf *)sc)->avg_value;
|
albedo *= ((ccl_private const PrincipledSheenBsdf *)sc)->avg_value;
|
||||||
break;
|
break;
|
||||||
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
|
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
|
||||||
albedo *= bsdf_principled_hair_albedo(sc);
|
albedo *= bsdf_principled_hair_albedo(sd, sc);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@@ -478,10 +478,18 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
|
|||||||
return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
|
return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
|
ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderData *sd,
|
||||||
|
ccl_private const ShaderClosure *sc)
|
||||||
{
|
{
|
||||||
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
|
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
|
||||||
return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
|
|
||||||
|
const float cos_theta_o = cos_from_sin(dot(sd->wi, safe_normalize(sd->dPdu)));
|
||||||
|
const float cos_gamma_o = cos_from_sin(bsdf->extra->geom.w);
|
||||||
|
const float f = fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta);
|
||||||
|
|
||||||
|
const float roughness_scale = bsdf_principled_hair_albedo_roughness_scale(bsdf->v);
|
||||||
|
/* TODO(lukas): Adding the Fresnel term here as a workaround until the proper refactor. */
|
||||||
|
return exp(-sqrt(bsdf->sigma) * roughness_scale) + make_spectrum(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline Spectrum
|
ccl_device_inline Spectrum
|
||||||
|
@@ -519,14 +519,6 @@ ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
|
|||||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Required to maintain OSL interface. */
|
|
||||||
ccl_device int bsdf_microfacet_ggx_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
|
|
||||||
{
|
|
||||||
bsdf->alpha_y = bsdf->alpha_x;
|
|
||||||
|
|
||||||
return bsdf_microfacet_ggx_setup(bsdf);
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
|
ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
|
||||||
ccl_private const ShaderData *sd)
|
ccl_private const ShaderData *sd)
|
||||||
{
|
{
|
||||||
@@ -613,14 +605,6 @@ ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
|
|||||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Required to maintain OSL interface. */
|
|
||||||
ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
|
|
||||||
{
|
|
||||||
bsdf->alpha_y = bsdf->alpha_x;
|
|
||||||
|
|
||||||
return bsdf_microfacet_beckmann_setup(bsdf);
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
|
ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
|
||||||
{
|
{
|
||||||
bsdf->alpha_x = saturatef(bsdf->alpha_x);
|
bsdf->alpha_x = saturatef(bsdf->alpha_x);
|
||||||
|
@@ -90,8 +90,10 @@ ccl_device float schlick_fresnel(float u)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate the fresnel color, which is a blend between white and the F0 color */
|
/* Calculate the fresnel color, which is a blend between white and the F0 color */
|
||||||
ccl_device_forceinline Spectrum
|
ccl_device_forceinline Spectrum interpolate_fresnel_color(float3 L,
|
||||||
interpolate_fresnel_color(float3 L, float3 H, float ior, Spectrum F0)
|
float3 H,
|
||||||
|
float ior,
|
||||||
|
Spectrum F0)
|
||||||
{
|
{
|
||||||
/* Compute the real Fresnel term and remap it from real_F0..1 to F0..1.
|
/* Compute the real Fresnel term and remap it from real_F0..1 to F0..1.
|
||||||
* The reason why we use this remapping instead of directly doing the
|
* The reason why we use this remapping instead of directly doing the
|
||||||
|
@@ -10,7 +10,7 @@
|
|||||||
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||||
# define KERNEL_STUB
|
# define KERNEL_STUB
|
||||||
#else
|
#else
|
||||||
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
|
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
|
||||||
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
||||||
# define __KERNEL_SSE__
|
# define __KERNEL_SSE__
|
||||||
# define __KERNEL_SSE2__
|
# define __KERNEL_SSE2__
|
||||||
|
@@ -10,7 +10,7 @@
|
|||||||
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||||
# define KERNEL_STUB
|
# define KERNEL_STUB
|
||||||
#else
|
#else
|
||||||
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
|
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
|
||||||
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
||||||
# define __KERNEL_SSE2__
|
# define __KERNEL_SSE2__
|
||||||
# endif
|
# endif
|
||||||
|
@@ -10,7 +10,7 @@
|
|||||||
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||||
# define KERNEL_STUB
|
# define KERNEL_STUB
|
||||||
#else
|
#else
|
||||||
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
|
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
|
||||||
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
||||||
# define __KERNEL_SSE2__
|
# define __KERNEL_SSE2__
|
||||||
# define __KERNEL_SSE3__
|
# define __KERNEL_SSE3__
|
||||||
|
@@ -5,13 +5,14 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
#ifdef WITH_NANOVDB
|
#if !defined __KERNEL_METAL__
|
||||||
# define NDEBUG /* Disable "assert" in device code */
|
# ifdef WITH_NANOVDB
|
||||||
# define NANOVDB_USE_INTRINSICS
|
# define NDEBUG /* Disable "assert" in device code */
|
||||||
# include "nanovdb/NanoVDB.h"
|
# define NANOVDB_USE_INTRINSICS
|
||||||
# include "nanovdb/util/SampleFromVoxels.h"
|
# include "nanovdb/NanoVDB.h"
|
||||||
|
# include "nanovdb/util/SampleFromVoxels.h"
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
|
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
|
||||||
ccl_device float cubic_w0(float a)
|
ccl_device float cubic_w0(float a)
|
||||||
{
|
{
|
||||||
@@ -126,7 +127,7 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
|
|||||||
#ifdef WITH_NANOVDB
|
#ifdef WITH_NANOVDB
|
||||||
template<typename T, typename S>
|
template<typename T, typename S>
|
||||||
ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
|
ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
|
||||||
S &s, float x, float y, float z)
|
ccl_private S &s, float x, float y, float z)
|
||||||
{
|
{
|
||||||
float px = floorf(x);
|
float px = floorf(x);
|
||||||
float py = floorf(y);
|
float py = floorf(y);
|
||||||
@@ -157,13 +158,19 @@ ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tric
|
|||||||
g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
|
g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# if defined(__KERNEL_METAL__)
|
||||||
|
template<typename T>
|
||||||
|
__attribute__((noinline)) typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
|
||||||
|
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
|
||||||
|
# else
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
|
ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
|
||||||
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
|
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
|
||||||
|
# endif
|
||||||
{
|
{
|
||||||
using namespace nanovdb;
|
using namespace nanovdb;
|
||||||
|
|
||||||
NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
|
ccl_global NanoGrid<T> *const grid = (ccl_global NanoGrid<T> *)info.data;
|
||||||
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
|
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
|
||||||
AccessorType acc = grid->getAccessor();
|
AccessorType acc = grid->getAccessor();
|
||||||
|
|
||||||
|
@@ -401,6 +401,72 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
}
|
}
|
||||||
ccl_gpu_kernel_postfix
|
ccl_gpu_kernel_postfix
|
||||||
|
|
||||||
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORT_BLOCK_SIZE)
|
||||||
|
ccl_gpu_kernel_signature(integrator_sort_bucket_pass,
|
||||||
|
int num_states,
|
||||||
|
int partition_size,
|
||||||
|
int num_states_limit,
|
||||||
|
ccl_global int *indices,
|
||||||
|
int kernel_index)
|
||||||
|
{
|
||||||
|
#if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||||
|
int max_shaders = context.launch_params_metal.data.max_shaders;
|
||||||
|
ccl_global ushort *d_queued_kernel = (ccl_global ushort *)
|
||||||
|
kernel_integrator_state.path.queued_kernel;
|
||||||
|
ccl_global uint *d_shader_sort_key = (ccl_global uint *)
|
||||||
|
kernel_integrator_state.path.shader_sort_key;
|
||||||
|
ccl_global int *key_offsets = (ccl_global int *)
|
||||||
|
kernel_integrator_state.sort_partition_key_offsets;
|
||||||
|
|
||||||
|
gpu_parallel_sort_bucket_pass(num_states,
|
||||||
|
partition_size,
|
||||||
|
max_shaders,
|
||||||
|
kernel_index,
|
||||||
|
d_queued_kernel,
|
||||||
|
d_shader_sort_key,
|
||||||
|
key_offsets,
|
||||||
|
(threadgroup int *)threadgroup_array,
|
||||||
|
metal_local_id,
|
||||||
|
metal_local_size,
|
||||||
|
metal_grid_id);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
ccl_gpu_kernel_postfix
|
||||||
|
|
||||||
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORT_BLOCK_SIZE)
|
||||||
|
ccl_gpu_kernel_signature(integrator_sort_write_pass,
|
||||||
|
int num_states,
|
||||||
|
int partition_size,
|
||||||
|
int num_states_limit,
|
||||||
|
ccl_global int *indices,
|
||||||
|
int kernel_index)
|
||||||
|
{
|
||||||
|
#if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||||
|
int max_shaders = context.launch_params_metal.data.max_shaders;
|
||||||
|
ccl_global ushort *d_queued_kernel = (ccl_global ushort *)
|
||||||
|
kernel_integrator_state.path.queued_kernel;
|
||||||
|
ccl_global uint *d_shader_sort_key = (ccl_global uint *)
|
||||||
|
kernel_integrator_state.path.shader_sort_key;
|
||||||
|
ccl_global int *key_offsets = (ccl_global int *)
|
||||||
|
kernel_integrator_state.sort_partition_key_offsets;
|
||||||
|
|
||||||
|
gpu_parallel_sort_write_pass(num_states,
|
||||||
|
partition_size,
|
||||||
|
max_shaders,
|
||||||
|
kernel_index,
|
||||||
|
num_states_limit,
|
||||||
|
indices,
|
||||||
|
d_queued_kernel,
|
||||||
|
d_shader_sort_key,
|
||||||
|
key_offsets,
|
||||||
|
(threadgroup int *)threadgroup_array,
|
||||||
|
metal_local_id,
|
||||||
|
metal_local_size,
|
||||||
|
metal_grid_id);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
ccl_gpu_kernel_postfix
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
ccl_gpu_kernel_signature(integrator_compact_paths_array,
|
ccl_gpu_kernel_signature(integrator_compact_paths_array,
|
||||||
int num_states,
|
int num_states,
|
||||||
@@ -579,7 +645,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
|
|||||||
const int y,
|
const int y,
|
||||||
const half4 half_pixel)
|
const half4 half_pixel)
|
||||||
{
|
{
|
||||||
/* Work around HIP issue with half float display, see T92972. */
|
/* Work around HIP issue with half float display, see #92972. */
|
||||||
#ifdef __KERNEL_HIP__
|
#ifdef __KERNEL_HIP__
|
||||||
ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
|
ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
|
||||||
out[0] = half_pixel.x;
|
out[0] = half_pixel.x;
|
||||||
|
@@ -178,7 +178,7 @@ __device__
|
|||||||
simd_lane_index, \
|
simd_lane_index, \
|
||||||
simd_group_index, \
|
simd_group_index, \
|
||||||
num_simd_groups, \
|
num_simd_groups, \
|
||||||
simdgroup_offset)
|
(threadgroup int *)threadgroup_array)
|
||||||
#elif defined(__KERNEL_ONEAPI__)
|
#elif defined(__KERNEL_ONEAPI__)
|
||||||
|
|
||||||
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
|
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
|
||||||
|
@@ -19,6 +19,115 @@ CCL_NAMESPACE_BEGIN
|
|||||||
# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
|
# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
|
||||||
#endif
|
#endif
|
||||||
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
||||||
|
#define GPU_PARALLEL_SORT_BLOCK_SIZE 1024
|
||||||
|
|
||||||
|
#if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||||
|
|
||||||
|
# define atomic_store_local(p, x) \
|
||||||
|
atomic_store_explicit((threadgroup atomic_int *)p, x, memory_order_relaxed)
|
||||||
|
# define atomic_load_local(p) \
|
||||||
|
atomic_load_explicit((threadgroup atomic_int *)p, memory_order_relaxed)
|
||||||
|
|
||||||
|
ccl_device_inline void gpu_parallel_sort_bucket_pass(const uint num_states,
|
||||||
|
const uint partition_size,
|
||||||
|
const uint max_shaders,
|
||||||
|
const uint queued_kernel,
|
||||||
|
ccl_global ushort *d_queued_kernel,
|
||||||
|
ccl_global uint *d_shader_sort_key,
|
||||||
|
ccl_global int *partition_key_offsets,
|
||||||
|
ccl_gpu_shared int *buckets,
|
||||||
|
const ushort local_id,
|
||||||
|
const ushort local_size,
|
||||||
|
const ushort grid_id)
|
||||||
|
{
|
||||||
|
/* Zero the bucket sizes. */
|
||||||
|
if (local_id < max_shaders) {
|
||||||
|
atomic_store_local(&buckets[local_id], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_gpu_syncthreads();
|
||||||
|
|
||||||
|
/* Determine bucket sizes within the partitions. */
|
||||||
|
|
||||||
|
const uint partition_start = partition_size * uint(grid_id);
|
||||||
|
const uint partition_end = min(num_states, partition_start + partition_size);
|
||||||
|
|
||||||
|
for (int state_index = partition_start + uint(local_id); state_index < partition_end;
|
||||||
|
state_index += uint(local_size)) {
|
||||||
|
ushort kernel_index = d_queued_kernel[state_index];
|
||||||
|
if (kernel_index == queued_kernel) {
|
||||||
|
uint key = d_shader_sort_key[state_index] % max_shaders;
|
||||||
|
atomic_fetch_and_add_uint32(&buckets[key], 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_gpu_syncthreads();
|
||||||
|
|
||||||
|
/* Calculate the partition's local offsets from the prefix sum of bucket sizes. */
|
||||||
|
|
||||||
|
if (local_id == 0) {
|
||||||
|
int offset = 0;
|
||||||
|
for (int i = 0; i < max_shaders; i++) {
|
||||||
|
partition_key_offsets[i + uint(grid_id) * (max_shaders + 1)] = offset;
|
||||||
|
offset = offset + atomic_load_local(&buckets[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Store the number of active states in this partition. */
|
||||||
|
partition_key_offsets[max_shaders + uint(grid_id) * (max_shaders + 1)] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_device_inline void gpu_parallel_sort_write_pass(const uint num_states,
|
||||||
|
const uint partition_size,
|
||||||
|
const uint max_shaders,
|
||||||
|
const uint queued_kernel,
|
||||||
|
const int num_states_limit,
|
||||||
|
ccl_global int *indices,
|
||||||
|
ccl_global ushort *d_queued_kernel,
|
||||||
|
ccl_global uint *d_shader_sort_key,
|
||||||
|
ccl_global int *partition_key_offsets,
|
||||||
|
ccl_gpu_shared int *local_offset,
|
||||||
|
const ushort local_id,
|
||||||
|
const ushort local_size,
|
||||||
|
const ushort grid_id)
|
||||||
|
{
|
||||||
|
/* Calculate each partition's global offset from the prefix sum of the active state counts per
|
||||||
|
* partition. */
|
||||||
|
|
||||||
|
if (local_id < max_shaders) {
|
||||||
|
int partition_offset = 0;
|
||||||
|
for (int i = 0; i < uint(grid_id); i++) {
|
||||||
|
int partition_key_count = partition_key_offsets[max_shaders + uint(i) * (max_shaders + 1)];
|
||||||
|
partition_offset += partition_key_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_global int *key_offsets = partition_key_offsets + (uint(grid_id) * (max_shaders + 1));
|
||||||
|
atomic_store_local(&local_offset[local_id], key_offsets[local_id] + partition_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_gpu_syncthreads();
|
||||||
|
|
||||||
|
/* Write the sorted active indices. */
|
||||||
|
|
||||||
|
const uint partition_start = partition_size * uint(grid_id);
|
||||||
|
const uint partition_end = min(num_states, partition_start + partition_size);
|
||||||
|
|
||||||
|
ccl_global int *key_offsets = partition_key_offsets + (uint(grid_id) * max_shaders);
|
||||||
|
|
||||||
|
for (int state_index = partition_start + uint(local_id); state_index < partition_end;
|
||||||
|
state_index += uint(local_size)) {
|
||||||
|
ushort kernel_index = d_queued_kernel[state_index];
|
||||||
|
if (kernel_index == queued_kernel) {
|
||||||
|
uint key = d_shader_sort_key[state_index] % max_shaders;
|
||||||
|
int index = atomic_fetch_and_add_uint32(&local_offset[key], 1);
|
||||||
|
if (index < num_states_limit) {
|
||||||
|
indices[index] = state_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __KERNEL_LOCAL_ATOMIC_SORT__ */
|
||||||
|
|
||||||
template<typename GetKeyOp>
|
template<typename GetKeyOp>
|
||||||
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
|
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
|
||||||
|
@@ -172,17 +172,14 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
|||||||
kernel_assert(!"Invalid ift_local");
|
kernel_assert(!"Invalid ift_local");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
# endif
|
if (is_null_intersection_function_table(metal_ancillaries->ift_local_prim)) {
|
||||||
|
if (local_isect) {
|
||||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
local_isect->num_hits = 0;
|
||||||
metalrt_intersector_type metalrt_intersect;
|
}
|
||||||
|
kernel_assert(!"Invalid ift_local_prim");
|
||||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
return false;
|
||||||
|
|
||||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
|
||||||
if (triangle_only) {
|
|
||||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
MetalRTIntersectionLocalPayload payload;
|
MetalRTIntersectionLocalPayload payload;
|
||||||
payload.self = ray->self;
|
payload.self = ray->self;
|
||||||
@@ -195,14 +192,48 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
|||||||
}
|
}
|
||||||
payload.result = false;
|
payload.result = false;
|
||||||
|
|
||||||
typename metalrt_intersector_type::result_type intersection;
|
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||||
|
|
||||||
# if defined(__METALRT_MOTION__)
|
# if defined(__METALRT_MOTION__)
|
||||||
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
typename metalrt_intersector_type::result_type intersection;
|
||||||
|
|
||||||
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
|
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||||
|
if (triangle_only) {
|
||||||
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
|
}
|
||||||
|
|
||||||
intersection = metalrt_intersect.intersect(
|
intersection = metalrt_intersect.intersect(
|
||||||
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
|
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
|
||||||
# else
|
# else
|
||||||
|
|
||||||
|
metalrt_blas_intersector_type metalrt_intersect;
|
||||||
|
typename metalrt_blas_intersector_type::result_type intersection;
|
||||||
|
|
||||||
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
|
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||||
|
if (triangle_only) {
|
||||||
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we know we are going to get max one hit, like for random-sss-walk we can
|
||||||
|
// optimize and accept the first hit
|
||||||
|
if (max_hits == 1) {
|
||||||
|
metalrt_intersect.accept_any_intersection(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
int blas_index = metal_ancillaries->blas_userID_to_index_lookUp[local_object];
|
||||||
|
// transform the ray into object's local space
|
||||||
|
Transform itfm = kernel_data_fetch(objects, local_object).itfm;
|
||||||
|
r.origin = transform_point(&itfm, r.origin);
|
||||||
|
r.direction = transform_direction(&itfm, r.direction);
|
||||||
|
|
||||||
intersection = metalrt_intersect.intersect(
|
intersection = metalrt_intersect.intersect(
|
||||||
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
|
r,
|
||||||
|
metal_ancillaries->blas_accel_structs[blas_index].blas,
|
||||||
|
metal_ancillaries->ift_local_prim,
|
||||||
|
payload);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
if (lcg_state) {
|
if (lcg_state) {
|
||||||
|
@@ -105,10 +105,11 @@ struct kernel_gpu_##name \
|
|||||||
{ \
|
{ \
|
||||||
PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
|
PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
|
||||||
void run(thread MetalKernelContext& context, \
|
void run(thread MetalKernelContext& context, \
|
||||||
threadgroup int *simdgroup_offset, \
|
threadgroup atomic_int *threadgroup_array, \
|
||||||
const uint metal_global_id, \
|
const uint metal_global_id, \
|
||||||
const ushort metal_local_id, \
|
const ushort metal_local_id, \
|
||||||
const ushort metal_local_size, \
|
const ushort metal_local_size, \
|
||||||
|
const ushort metal_grid_id, \
|
||||||
uint simdgroup_size, \
|
uint simdgroup_size, \
|
||||||
uint simd_lane_index, \
|
uint simd_lane_index, \
|
||||||
uint simd_group_index, \
|
uint simd_group_index, \
|
||||||
@@ -117,22 +118,24 @@ struct kernel_gpu_##name \
|
|||||||
kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \
|
kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \
|
||||||
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
||||||
constant MetalAncillaries *_metal_ancillaries, \
|
constant MetalAncillaries *_metal_ancillaries, \
|
||||||
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
|
threadgroup atomic_int *threadgroup_array[[ threadgroup(0) ]], \
|
||||||
const uint metal_global_id [[thread_position_in_grid]], \
|
const uint metal_global_id [[thread_position_in_grid]], \
|
||||||
const ushort metal_local_id [[thread_position_in_threadgroup]], \
|
const ushort metal_local_id [[thread_position_in_threadgroup]], \
|
||||||
const ushort metal_local_size [[threads_per_threadgroup]], \
|
const ushort metal_local_size [[threads_per_threadgroup]], \
|
||||||
|
const ushort metal_grid_id [[threadgroup_position_in_grid]], \
|
||||||
uint simdgroup_size [[threads_per_simdgroup]], \
|
uint simdgroup_size [[threads_per_simdgroup]], \
|
||||||
uint simd_lane_index [[thread_index_in_simdgroup]], \
|
uint simd_lane_index [[thread_index_in_simdgroup]], \
|
||||||
uint simd_group_index [[simdgroup_index_in_threadgroup]], \
|
uint simd_group_index [[simdgroup_index_in_threadgroup]], \
|
||||||
uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
|
uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
|
||||||
MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
|
MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
|
||||||
params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
params_struct->run(context, threadgroup_array, metal_global_id, metal_local_id, metal_local_size, metal_grid_id, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
||||||
} \
|
} \
|
||||||
void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||||
threadgroup int *simdgroup_offset, \
|
threadgroup atomic_int *threadgroup_array, \
|
||||||
const uint metal_global_id, \
|
const uint metal_global_id, \
|
||||||
const ushort metal_local_id, \
|
const ushort metal_local_id, \
|
||||||
const ushort metal_local_size, \
|
const ushort metal_local_size, \
|
||||||
|
const ushort metal_grid_id, \
|
||||||
uint simdgroup_size, \
|
uint simdgroup_size, \
|
||||||
uint simd_lane_index, \
|
uint simd_lane_index, \
|
||||||
uint simd_group_index, \
|
uint simd_group_index, \
|
||||||
@@ -263,18 +266,34 @@ ccl_device_forceinline uchar4 make_uchar4(const uchar x,
|
|||||||
|
|
||||||
# if defined(__METALRT_MOTION__)
|
# if defined(__METALRT_MOTION__)
|
||||||
# define METALRT_TAGS instancing, instance_motion, primitive_motion
|
# define METALRT_TAGS instancing, instance_motion, primitive_motion
|
||||||
|
# define METALRT_BLAS_TAGS , primitive_motion
|
||||||
# else
|
# else
|
||||||
# define METALRT_TAGS instancing
|
# define METALRT_TAGS instancing
|
||||||
|
# define METALRT_BLAS_TAGS
|
||||||
# endif /* __METALRT_MOTION__ */
|
# endif /* __METALRT_MOTION__ */
|
||||||
|
|
||||||
typedef acceleration_structure<METALRT_TAGS> metalrt_as_type;
|
typedef acceleration_structure<METALRT_TAGS> metalrt_as_type;
|
||||||
typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type;
|
typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type;
|
||||||
typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type;
|
typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type;
|
||||||
|
# if defined(__METALRT_MOTION__)
|
||||||
|
typedef acceleration_structure<primitive_motion> metalrt_blas_as_type;
|
||||||
|
typedef intersection_function_table<triangle_data, primitive_motion> metalrt_blas_ift_type;
|
||||||
|
typedef metal::raytracing::intersector<triangle_data, primitive_motion>
|
||||||
|
metalrt_blas_intersector_type;
|
||||||
|
# else
|
||||||
|
typedef acceleration_structure<> metalrt_blas_as_type;
|
||||||
|
typedef intersection_function_table<triangle_data> metalrt_blas_ift_type;
|
||||||
|
typedef metal::raytracing::intersector<triangle_data> metalrt_blas_intersector_type;
|
||||||
|
# endif
|
||||||
|
|
||||||
#endif /* __METALRT__ */
|
#endif /* __METALRT__ */
|
||||||
|
|
||||||
/* texture bindings and sampler setup */
|
/* texture bindings and sampler setup */
|
||||||
|
|
||||||
|
struct Buffer1DParamsMetal {
|
||||||
|
device float *buf;
|
||||||
|
};
|
||||||
|
|
||||||
struct Texture2DParamsMetal {
|
struct Texture2DParamsMetal {
|
||||||
texture2d<float, access::sample> tex;
|
texture2d<float, access::sample> tex;
|
||||||
};
|
};
|
||||||
@@ -282,15 +301,25 @@ struct Texture3DParamsMetal {
|
|||||||
texture3d<float, access::sample> tex;
|
texture3d<float, access::sample> tex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef __METALRT__
|
||||||
|
struct MetalRTBlasWrapper {
|
||||||
|
metalrt_blas_as_type blas;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
struct MetalAncillaries {
|
struct MetalAncillaries {
|
||||||
device Texture2DParamsMetal *textures_2d;
|
device Texture2DParamsMetal *textures_2d;
|
||||||
device Texture3DParamsMetal *textures_3d;
|
device Texture3DParamsMetal *textures_3d;
|
||||||
|
device Buffer1DParamsMetal *buffers;
|
||||||
|
|
||||||
#ifdef __METALRT__
|
#ifdef __METALRT__
|
||||||
metalrt_as_type accel_struct;
|
metalrt_as_type accel_struct;
|
||||||
metalrt_ift_type ift_default;
|
metalrt_ift_type ift_default;
|
||||||
metalrt_ift_type ift_shadow;
|
metalrt_ift_type ift_shadow;
|
||||||
metalrt_ift_type ift_local;
|
metalrt_ift_type ift_local;
|
||||||
|
metalrt_blas_ift_type ift_local_prim;
|
||||||
|
constant MetalRTBlasWrapper *blas_accel_structs;
|
||||||
|
constant int *blas_userID_to_index_lookUp;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -3,6 +3,13 @@
|
|||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
||||||
|
#ifdef WITH_NANOVDB
|
||||||
|
# define NDEBUG /* Disable "assert" in device code */
|
||||||
|
# define NANOVDB_USE_INTRINSICS
|
||||||
|
# include "nanovdb/NanoVDB.h"
|
||||||
|
# include "nanovdb/util/SampleFromVoxels.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Open the Metal kernel context class
|
/* Open the Metal kernel context class
|
||||||
* Necessary to access resource bindings */
|
* Necessary to access resource bindings */
|
||||||
class MetalKernelContext {
|
class MetalKernelContext {
|
||||||
|
@@ -139,6 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[intersection(triangle, triangle_data )]] TriangleIntersectionResult
|
||||||
|
__anyhit__cycles_metalrt_local_hit_tri_prim(
|
||||||
|
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||||
|
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||||
|
uint primitive_id [[primitive_id]],
|
||||||
|
float2 barycentrics [[barycentric_coord]],
|
||||||
|
float ray_tmax [[distance]])
|
||||||
|
{
|
||||||
|
//instance_id, aka the user_id has been removed. If we take this function we optimized the
|
||||||
|
//SSS for starting traversal from a primitive acceleration structure instead of the root of the global AS.
|
||||||
|
//this means we will always be intersecting the correct object no need for the userid to check
|
||||||
|
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||||
|
launch_params_metal, payload, payload.local_object, primitive_id, barycentrics, ray_tmax);
|
||||||
|
}
|
||||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||||
__anyhit__cycles_metalrt_local_hit_tri(
|
__anyhit__cycles_metalrt_local_hit_tri(
|
||||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||||
@@ -163,6 +177,17 @@ __anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[intersection(bounding_box, triangle_data )]] BoundingBoxIntersectionResult
|
||||||
|
__anyhit__cycles_metalrt_local_hit_box_prim(const float ray_tmax [[max_distance]])
|
||||||
|
{
|
||||||
|
/* unused function */
|
||||||
|
BoundingBoxIntersectionResult result;
|
||||||
|
result.distance = ray_tmax;
|
||||||
|
result.accept = false;
|
||||||
|
result.continue_search = false;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template<uint intersection_type>
|
template<uint intersection_type>
|
||||||
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||||
|
@@ -372,6 +372,16 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
|
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS: {
|
||||||
|
oneapi_call(
|
||||||
|
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sort_bucket_pass);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS: {
|
||||||
|
oneapi_call(
|
||||||
|
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sort_write_pass);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: {
|
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: {
|
||||||
oneapi_call(kg,
|
oneapi_call(kg,
|
||||||
cgh,
|
cgh,
|
||||||
|
@@ -132,6 +132,9 @@ typedef struct IntegratorStateGPU {
|
|||||||
/* Index of main path which will be used by a next shadow catcher split. */
|
/* Index of main path which will be used by a next shadow catcher split. */
|
||||||
ccl_global int *next_main_path_index;
|
ccl_global int *next_main_path_index;
|
||||||
|
|
||||||
|
/* Partition/key offsets used when writing sorted active indices. */
|
||||||
|
ccl_global int *sort_partition_key_offsets;
|
||||||
|
|
||||||
/* Divisor used to partition active indices by locality when sorting by material. */
|
/* Divisor used to partition active indices by locality when sorting by material. */
|
||||||
uint sort_partition_divisor;
|
uint sort_partition_divisor;
|
||||||
} IntegratorStateGPU;
|
} IntegratorStateGPU;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user