Compare commits
593 Commits
render-lay
...
datablock_
Author | SHA1 | Date | |
---|---|---|---|
78f1476d93 | |||
5937ebba44 | |||
9170e49250 | |||
92aeb84fde | |||
7b149bfde6 | |||
5ce95df2c6 | |||
e74254dd48 | |||
a63a31dd12 | |||
8a60d84327 | |||
ffac92e385 | |||
b93ddfd8ac | |||
ca5ccf5cd4 | |||
4f7eb3ad12 | |||
e131783384 | |||
6347b0b0aa | |||
728f75c6a7 | |||
e741804ce3 | |||
cc93a66e71 | |||
2aa0215ec5 | |||
d27ef3913a | |||
54a60eff24 | |||
ab347c8380 | |||
368b74315a | |||
f65d6ea954 | |||
9cf2a581ab | |||
3bf0026bec | |||
e1fb080743 | |||
d424c8041d | |||
3c76da79b4 | |||
4a3708b7ea | |||
6c42079b78 | |||
25ab3aac9d | |||
90df1142a3 | |||
27d20a04b5 | |||
26549b5ba2 | |||
ff693959d8 | |||
7f7c807a92 | |||
39e1121698 | |||
e5fa738ce9 | |||
aebd8a7328 | |||
8072d4bd66 | |||
952f31b0d8 | |||
4b7d95290f | |||
d097c2a1b3 | |||
8bd61ea54d | |||
14c2083460 | |||
01e0b38b66 | |||
314ccf6494 | |||
4cfac9edab | |||
abb876d84b | |||
b7464ec6a2 | |||
5b3b0b4778 | |||
843be91002 | |||
e444a41545 | |||
e446935265 | |||
a88801b99b | |||
ced8fff5de | |||
9b1564a862 | |||
31e6249256 | |||
66ef0b8834 | |||
171c39cc19 | |||
48fa2c83eb | |||
aae70f182b | |||
603aafc9dc | |||
be17445714 | |||
cc7386ec6b | |||
5af4e1ca15 | |||
270df9a60f | |||
30bed91b78 | |||
0579eaae1f | |||
61db9ee27a | |||
c2d3bb7090 | |||
dd662c74ae | |||
a7ca991841 | |||
cb6ec44fc7 | |||
6332e0e1f7 | |||
df7f6a3e2e | |||
b3f9ae0125 | |||
15ff75d06b | |||
ac43e5cc87 | |||
286adfde38 | |||
93543e6695 | |||
4c7f4e4662 | |||
4f69dca547 | |||
d808557d15 | |||
02b2094847 | |||
93426cb295 | |||
6ea54fe9ff | |||
f512c8c2a8 | |||
69aa6577b3 | |||
a5f316e999 | |||
59bb4ca1b0 | |||
3f61280327 | |||
e1909958d9 | |||
bed327f1ce | |||
39172c6f34 | |||
1b5acbb329 | |||
5ce120b865 | |||
6a5e92c022 | |||
7a65f9b171 | |||
0df33cc52d | |||
0cfc557c5d | |||
43b255a2a1 | |||
bd053ac7ba | |||
eef52b2818 | |||
60ac61b13f | |||
38589ce6ef | |||
95e0cb499b | |||
2a05292efa | |||
db0bada1c3 | |||
8d48ea0233 | |||
![]() |
e07ffcbd1c | ||
![]() |
8ada7f7397 | ||
d14e39622a | |||
5aaa643947 | |||
086320a62e | |||
356aacab6b | |||
505b3b7328 | |||
2830f687aa | |||
4bdb2d4885 | |||
fa63515c37 | |||
001fce167a | |||
ed072e1dcd | |||
15143a7464 | |||
8c0682a93c | |||
f8e02c75ba | |||
393efccb19 | |||
![]() |
fe3fb23697 | ||
86730f1f35 | |||
7cb2974182 | |||
a6f74453b6 | |||
f68145011f | |||
a7f16c17c2 | |||
dab3865c0b | |||
e9770adf63 | |||
8e58e197fd | |||
5b45715f8a | |||
6aa972ebd4 | |||
467d824f80 | |||
e32710d2d7 | |||
85a5fbf2ce | |||
a14fb77fee | |||
d52191616b | |||
178708f142 | |||
d23459f516 | |||
bc0b5d611c | |||
50f9fc7a53 | |||
0453c807e0 | |||
6a6566a7fc | |||
096602d3a8 | |||
05b7591c62 | |||
9af6f40e4d | |||
a96110e710 | |||
27248c8636 | |||
ba8c7d2ba1 | |||
a1348dde2e | |||
2a5d7b5b1e | |||
a5b6742ed2 | |||
f8a999c965 | |||
b797a5ff78 | |||
aa0602130b | |||
1c5cceb7af | |||
e8ff06186e | |||
5c06ff8bb9 | |||
e04970b392 | |||
2c78b0c71f | |||
b48ba0909a | |||
476f5c473a | |||
51d4743033 | |||
6cfa962986 | |||
7c094f6079 | |||
092d673689 | |||
339d0170d1 | |||
1729dd9998 | |||
33e32c341a | |||
ec3989441f | |||
1978ac65c4 | |||
89631485cc | |||
1600b93fb8 | |||
4f4a484b9b | |||
4d82d525f8 | |||
a63ba2739e | |||
12b62b58e1 | |||
762319e911 | |||
d8b34a17ac | |||
c7a4f96f88 | |||
2ba1868c3f | |||
387ba87ad3 | |||
dc5007648c | |||
![]() |
412220c8d3 | ||
797b1d5053 | |||
2b44db4cfc | |||
8563d3b254 | |||
d0253b2ea4 | |||
253281f9d6 | |||
a3f48d65df | |||
a0f16e12a0 | |||
7780a108b3 | |||
a41240439b | |||
fceb1d0781 | |||
eb1a57b12c | |||
8fff6cc2f5 | |||
13d8661503 | |||
3c4df13924 | |||
d544a61e8a | |||
a201b99c5a | |||
6b86b446d3 | |||
18bf900b31 | |||
06159e6a58 | |||
dbc8b81ecf | |||
eaf88f564c | |||
fa11d41113 | |||
df76616d74 | |||
19d493ee10 | |||
84935998a7 | |||
56d3cc9341 | |||
9bdda427e6 | |||
2fbc50e4c1 | |||
3ceb68c833 | |||
e392bb4937 | |||
d863b5182e | |||
9d873fc3de | |||
ea3d7a7f58 | |||
d6b4fb6429 | |||
502c4be56e | |||
a58350b07f | |||
98b81493f3 | |||
e361adbca2 | |||
439a277aa5 | |||
2cae58524c | |||
60a344b43d | |||
b27e224276 | |||
750c0dd4de | |||
d4d8da28fc | |||
b2d3956e7b | |||
db04980678 | |||
f7793bd53c | |||
1f65ab606b | |||
fa9bd04483 | |||
0434053f13 | |||
68e58f1991 | |||
6d8875bd61 | |||
c4e07dd29b | |||
aad9dd2f1b | |||
1cad64900e | |||
1ff753baa4 | |||
26620f3f87 | |||
4833a71621 | |||
d68a84d1d2 | |||
375ede0f3f | |||
68496c0b38 | |||
c832354e33 | |||
c44cdd5905 | |||
c5dba540d7 | |||
![]() |
81dc8dd42a | ||
![]() |
c6c85a8c6b | ||
af1d9ecd40 | |||
9ad252d157 | |||
6a5487e021 | |||
f13c729b26 | |||
647fb6ef1e | |||
4877c9362a | |||
76ec329dd1 | |||
1208792adb | |||
582f9ddeb7 | |||
5ba51de84a | |||
43f7d5643f | |||
![]() |
da6cd77628 | ||
f3ff03b3c1 | |||
284701e371 | |||
1410ea0478 | |||
521133682c | |||
8dd0355c21 | |||
0ee1cdab7e | |||
4c5374d46a | |||
810982a95c | |||
76acaefdd7 | |||
0c72008592 | |||
aa36c73c33 | |||
2b3cc24388 | |||
![]() |
f169ff8b88 | ||
8794a43b68 | |||
e8021f5e3b | |||
10404e3e56 | |||
b759d3c9c5 | |||
18ed060bc3 | |||
6521307dcd | |||
f28376d8d9 | |||
![]() |
8ca11f5b72 | ||
3f94836922 | |||
68ca973f7f | |||
76015f98ae | |||
bcc8c04db4 | |||
98045648ab | |||
304315d181 | |||
ce155ad2f6 | |||
96868a3941 | |||
5afe4c787f | |||
5be8adf8c0 | |||
f667593b6a | |||
2d3c44389a | |||
c374e9f1f5 | |||
b6713dcbe5 | |||
a81ea40836 | |||
103ae04fbc | |||
15eb83c8b3 | |||
9d6acc34a1 | |||
59fd21296a | |||
17b3097205 | |||
6038583909 | |||
3dbb560331 | |||
12e681909f | |||
4a2cde3f0e | |||
17689f8bb6 | |||
62cc226101 | |||
![]() |
9de9f25b24 | ||
![]() |
06c051363b | ||
![]() |
e8b5a5bf5b | ||
![]() |
57e26627c4 | ||
![]() |
6c942db30d | ||
88e8e7a074 | |||
4ab322fdd2 | |||
c837bd5ea5 | |||
45b764e95b | |||
817e975dee | |||
97c4c2689f | |||
05dfe9c318 | |||
c24d045a23 | |||
9af0c8b00a | |||
6f3f891c58 | |||
75cb4850f0 | |||
ecfbfe478b | |||
712f7c3640 | |||
ef7c36f5ed | |||
a095611eb8 | |||
3505be8361 | |||
64751552f7 | |||
fe7cc94dfa | |||
306034790f | |||
997e345bd2 | |||
18e50927f7 | |||
223f45818e | |||
cd7d5669d1 | |||
4cf501b835 | |||
5b8f1c8d34 | |||
b78e543af9 | |||
817873cc83 | |||
0892352bfe | |||
352ee7c3ef | |||
a87766416f | |||
365a4239c5 | |||
230c00d872 | |||
520b53364c | |||
dfd6055eb0 | |||
bc652766e8 | |||
0f56f7a811 | |||
9e566b06e3 | |||
347410a322 | |||
55c2cd85f0 | |||
2f722f1a49 | |||
a07ad02156 | |||
9522f8acf0 | |||
35d78121f0 | |||
![]() |
af076031d6 | ||
![]() |
ca796f872e | ||
15fa806160 | |||
f1c764fd8f | |||
0e995e0bfe | |||
b498db06eb | |||
3623f32b48 | |||
355ad008a2 | |||
![]() |
80444effc6 | ||
e72af060ab | |||
5f98cd6360 | |||
a461216885 | |||
3caeb51d7f | |||
f75b52eca1 | |||
4a4d71414e | |||
2089a17f7e | |||
6b9d73e8a7 | |||
7b92b64742 | |||
2e8398c095 | |||
18c2a44333 | |||
5f05dac28f | |||
17cf423f30 | |||
91ce13e90d | |||
c0d0ef142f | |||
25de610876 | |||
cdfae957f2 | |||
810d7d4694 | |||
df88d54284 | |||
42cb93205c | |||
![]() |
a78717a72d | ||
![]() |
e7dc46d278 | ||
a83a68b9b6 | |||
87f8bb8d1d | |||
499faa8b11 | |||
856077618a | |||
193827e59b | |||
49c99549eb | |||
278fce1170 | |||
7fcae7ba60 | |||
ecee40e919 | |||
714e85b534 | |||
32c5f3d772 | |||
f0cf15b5c6 | |||
351c9239ed | |||
c1012c6c3a | |||
87f236cd10 | |||
efe78d824e | |||
a581b65822 | |||
6d1ac79514 | |||
4fa4132e45 | |||
cd5c853307 | |||
2342cd0a0f | |||
691ffb60b9 | |||
bf7006c15a | |||
5acac13eb4 | |||
f1b21d5960 | |||
209a64111e | |||
00ceb6d2f4 | |||
f7d67835e9 | |||
cc78690be3 | |||
238db604c5 | |||
845ba1a6fb | |||
406398213c | |||
631ecbc4ca | |||
112e4de885 | |||
0561aa771b | |||
5c3216e233 | |||
d66d5790e9 | |||
94ca09e01c | |||
2c4564b044 | |||
a0b8a9fe68 | |||
8c5826f59a | |||
15f1072ee2 | |||
caaf5f0a09 | |||
9062c086b4 | |||
1e29286c8c | |||
f49e28bae7 | |||
4c164487bc | |||
29859d0d5e | |||
c067f1d0d4 | |||
c7ad27fc07 | |||
6a249bb000 | |||
50328b41a7 | |||
4e12113bea | |||
13e075600a | |||
9eb647f1c8 | |||
60592f6778 | |||
9dd194716b | |||
![]() |
7359cc1060 | ||
43299f9465 | |||
5e1d4714fe | |||
b637db2a7a | |||
99947e2943 | |||
75cc33fa20 | |||
36c4fc1ea9 | |||
2c30fd83f1 | |||
ae1c1cd8c0 | |||
3622074bf7 | |||
4e9b17da4c | |||
34a502c16a | |||
696836af1d | |||
75ce4ebc12 | |||
333dc8d60f | |||
9992e6a169 | |||
3f5b2e2682 | |||
6f1493f68f | |||
31123f09cd | |||
d41451a0ca | |||
6c59a3b37a | |||
7819d36d4e | |||
99a6bbf7dd | |||
21eae869ad | |||
306acb7dda | |||
26c8d559fe | |||
6468cb5f9c | |||
5cbaf56b26 | |||
fc185fb1d2 | |||
809ed38075 | |||
781507d2dd | |||
9b3d415f6a | |||
40e5bc15e9 | |||
41e0085fd3 | |||
e22d4699cb | |||
13d31b1604 | |||
3e628eefa9 | |||
efbe47f9cd | |||
fe47163a1e | |||
20283bfa0b | |||
dd79f907a7 | |||
8b8c0d0049 | |||
6cdc954e8c | |||
dc7bbd731a | |||
088c6a17ba | |||
5723aa8c02 | |||
b36e26bbce | |||
384b7e18f1 | |||
402b0aa59b | |||
af1e48e8ab | |||
737a3b8a0a | |||
324d057b25 | |||
4d325693e1 | |||
6c104f62b9 | |||
54102ab36e | |||
930186d3df | |||
21dbfb7828 | |||
581c819013 | |||
81eee0f536 | |||
37afa965a4 | |||
594015fb7e | |||
9148ce9f3c | |||
5552e83b53 | |||
e76364adcd | |||
1ac6e4c7a2 | |||
3ede515b5b | |||
9d8a9cacc3 | |||
e33e58bf23 | |||
e991af0934 | |||
cd4309ced0 | |||
0178915ce9 | |||
fd7e9f7974 | |||
d395d81bfc | |||
0b65b889ef | |||
b26da8b467 | |||
b929eef8c5 | |||
38155c7d3c | |||
bb1367cdaf | |||
e523cde574 | |||
d2f4900d1a | |||
351eb4fad1 | |||
316d23f2ba | |||
117d90b3da | |||
b16fd22018 | |||
da31a82832 | |||
04cf1538b5 | |||
31a025f51e | |||
dde40989f3 | |||
7447950bc3 | |||
9830eeb44b | |||
9c3d202e56 | |||
58a10122d0 | |||
98a1855803 | |||
8cda364d6f | |||
ac38d5652b | |||
95e7f93fa2 | |||
b320873382 | |||
ce9df09067 | |||
82df7100c8 | |||
69dbeeca48 | |||
b641d016e1 | |||
ce629c5dd9 | |||
e5bb005369 | |||
5d6177111d | |||
03be3102c7 | |||
03544eccb4 | |||
53896d4235 | |||
1158800d1b | |||
f7eaaf35b4 | |||
e217839fd3 | |||
dbdc346e9f | |||
0170c682fe | |||
e3f99329d8 | |||
ac8348d033 | |||
9e97b00873 | |||
c7f40caa2c | |||
c5cc9e046d | |||
d0015cba02 | |||
89f3837d68 | |||
385fe4f0ce | |||
223aff987a | |||
![]() |
351c409317 | ||
22156d951d | |||
da08aa4b96 | |||
75aa866211 | |||
47caf343c0 | |||
a2c469edc2 | |||
6663099810 | |||
c367e23d46 | |||
a0561a05ef | |||
21f3767809 | |||
0b4a9caf51 | |||
0e459ad1a3 | |||
52696a0d3f | |||
f3a7104adb | |||
a1820afa30 | |||
030e99588d | |||
aea17a612d | |||
dc1b45ff1a | |||
e1e85454ea | |||
103f2655ab | |||
ddf99214dc |
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -2,15 +2,19 @@
|
||||
path = release/scripts/addons
|
||||
url = ../blender-addons.git
|
||||
ignore = all
|
||||
branch = master
|
||||
[submodule "release/scripts/addons_contrib"]
|
||||
path = release/scripts/addons_contrib
|
||||
url = ../blender-addons-contrib.git
|
||||
ignore = all
|
||||
branch = master
|
||||
[submodule "release/datafiles/locale"]
|
||||
path = release/datafiles/locale
|
||||
url = ../blender-translations.git
|
||||
ignore = all
|
||||
branch = master
|
||||
[submodule "source/tools"]
|
||||
path = source/tools
|
||||
url = ../blender-dev-tools.git
|
||||
ignore = all
|
||||
branch = master
|
||||
|
@@ -445,6 +445,7 @@ option(WITH_BOOST "Enable features depending on boost" ON)
|
||||
|
||||
# Unit testsing
|
||||
option(WITH_GTESTS "Enable GTest unit testing" OFF)
|
||||
option(WITH_OPENGL_TESTS "Enable OpenGL related unit testing (Experimental)" OFF)
|
||||
|
||||
|
||||
# Documentation
|
||||
@@ -518,18 +519,20 @@ endif()
|
||||
option(WITH_LEGACY_DEPSGRAPH "Build Blender with legacy dependency graph" ON)
|
||||
mark_as_advanced(WITH_LEGACY_DEPSGRAPH)
|
||||
|
||||
# Use hardcoded paths or find_package to find externals
|
||||
option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
|
||||
if(WIN32)
|
||||
# Use hardcoded paths or find_package to find externals
|
||||
option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
|
||||
|
||||
option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_CODESIGN)
|
||||
option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_CODESIGN)
|
||||
|
||||
set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX)
|
||||
set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX)
|
||||
|
||||
set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
endif()
|
||||
|
||||
# avoid using again
|
||||
option_defaults_clear()
|
||||
@@ -924,7 +927,7 @@ if(WITH_X11)
|
||||
if(WITH_X11_ALPHA)
|
||||
find_library(X11_Xrender_LIB Xrender ${X11_LIB_SEARCH_PATH})
|
||||
mark_as_advanced(X11_Xrender_LIB)
|
||||
if (X11_Xrender_LIB)
|
||||
if(X11_Xrender_LIB)
|
||||
list(APPEND PLATFORM_LINKLIBS ${X11_Xrender_LIB})
|
||||
else()
|
||||
set(WITH_X11_ALPHA OFF)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- mode: gnumakefile; tab-width: 8; indent-tabs-mode: t; -*-
|
||||
# -*- mode: gnumakefile; tab-width: 4; indent-tabs-mode: t; -*-
|
||||
# vim: tabstop=4
|
||||
#
|
||||
# ##### BEGIN GPL LICENSE BLOCK #####
|
||||
@@ -113,7 +113,7 @@ CMAKE_CONFIG = cmake $(BUILD_CMAKE_ARGS) \
|
||||
# X11 spesific
|
||||
ifdef DISPLAY
|
||||
CMAKE_CONFIG_TOOL = cmake-gui
|
||||
else
|
||||
else
|
||||
CMAKE_CONFIG_TOOL = ccmake
|
||||
endif
|
||||
|
||||
@@ -127,7 +127,7 @@ all: .FORCE
|
||||
# # if test ! -f $(BUILD_DIR)/CMakeCache.txt ; then \
|
||||
# # $(CMAKE_CONFIG); \
|
||||
# # fi
|
||||
|
||||
|
||||
# # do this always incase of failed initial build, could be smarter here...
|
||||
@$(CMAKE_CONFIG)
|
||||
|
||||
|
@@ -360,7 +360,7 @@ OPENVDB_FORCE_REBUILD=false
|
||||
OPENVDB_SKIP=false
|
||||
|
||||
# Alembic needs to be compiled for now
|
||||
ALEMBIC_VERSION="1.6.0"
|
||||
ALEMBIC_VERSION="1.7.1"
|
||||
ALEMBIC_VERSION_MIN=$ALEMBIC_VERSION
|
||||
ALEMBIC_FORCE_BUILD=false
|
||||
ALEMBIC_FORCE_REBUILD=false
|
||||
@@ -2236,9 +2236,6 @@ compile_ALEMBIC() {
|
||||
return
|
||||
fi
|
||||
|
||||
compile_HDF5
|
||||
PRINT ""
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled result!
|
||||
alembic_magic=2
|
||||
_init_alembic
|
||||
@@ -2266,6 +2263,12 @@ compile_ALEMBIC() {
|
||||
|
||||
cmake_d="-D CMAKE_INSTALL_PREFIX=$_inst"
|
||||
|
||||
# Without Boost or TR1, Alembic requires C++11.
|
||||
if [ "$USE_CXX11" != true ]; then
|
||||
cmake_d="$cmake_d -D ALEMBIC_LIB_USES_BOOST=ON"
|
||||
cmake_d="$cmake_d -D ALEMBIC_LIB_USES_TR1=OFF"
|
||||
fi
|
||||
|
||||
if [ -d $INST/boost ]; then
|
||||
cmake_d="$cmake_d -D BOOST_ROOT=$INST/boost"
|
||||
cmake_d="$cmake_d -D USE_STATIC_BOOST=ON"
|
||||
@@ -2285,8 +2288,6 @@ compile_ALEMBIC() {
|
||||
cmake_d="$cmake_d -D USE_STATIC_HDF5=OFF"
|
||||
cmake_d="$cmake_d -D ALEMBIC_ILMBASE_LINK_STATIC=OFF"
|
||||
cmake_d="$cmake_d -D ALEMBIC_SHARED_LIBS=OFF"
|
||||
cmake_d="$cmake_d -D ALEMBIC_LIB_USES_BOOST=ON"
|
||||
cmake_d="$cmake_d -D ALEMBIC_LIB_USES_TR1=OFF"
|
||||
INFO "ILMBASE_ROOT=$INST/openexr"
|
||||
fi
|
||||
|
||||
@@ -4252,7 +4253,7 @@ print_info() {
|
||||
PRINT " $_3"
|
||||
_buildargs="$_buildargs $_1 $_2 $_3"
|
||||
if [ -d $INST/osl ]; then
|
||||
_1="-D CYCLES_OSL=$INST/osl"
|
||||
_1="-D OSL_ROOT_DIR=$INST/osl"
|
||||
PRINT " $_1"
|
||||
_buildargs="$_buildargs $_1"
|
||||
fi
|
||||
|
@@ -4,10 +4,10 @@
|
||||
# <pep8 compliant>
|
||||
|
||||
# List of the branches being built automatically overnight
|
||||
NIGHT_SCHEDULE_BRANCHES = [None]
|
||||
NIGHT_SCHEDULE_BRANCHES = [None, "blender2.8"]
|
||||
|
||||
# List of the branches available for force build
|
||||
FORCE_SCHEDULE_BRANCHES = ["master", "gooseberry", "experimental-build"]
|
||||
FORCE_SCHEDULE_BRANCHES = ["master", "blender2.8", "experimental-build"]
|
||||
|
||||
"""
|
||||
Stock Twisted directory lister doesn't provide any information about last file
|
||||
@@ -127,7 +127,14 @@ def schedule_force_build(name):
|
||||
project=forcesched.FixedParameter(name="project", default="", hide=True)),
|
||||
# For now, hide other codebases.
|
||||
forcesched.CodebaseParameter(hide=True, codebase="blender-translations"),
|
||||
forcesched.CodebaseParameter(hide=True, codebase="blender-addons"),
|
||||
forcesched.CodebaseParameter(
|
||||
codebase="blender-addons",
|
||||
branch=forcesched.ChoiceStringParameter(
|
||||
name="branch", choices=["master", "blender2.8"], default="master"),
|
||||
repository=forcesched.FixedParameter(name="repository", default="", hide=True),
|
||||
project=forcesched.FixedParameter(name="project", default="", hide=True),
|
||||
revision=forcesched.FixedParameter(name="revision", default="", hide=True),
|
||||
),
|
||||
forcesched.CodebaseParameter(hide=True, codebase="blender-addons-contrib"),
|
||||
forcesched.CodebaseParameter(hide=True, codebase="blender-dev-tools"),
|
||||
forcesched.CodebaseParameter(hide=True, codebase="lib svn")],
|
||||
@@ -139,11 +146,15 @@ def schedule_build(name, hour, minute=0):
|
||||
scheduler_name = "nightly " + name
|
||||
if current_branch:
|
||||
scheduler_name += ' ' + current_branch
|
||||
# Use special addons submodule branch when building blender2.8 branch.
|
||||
addons_branch = "master"
|
||||
if current_branch == "blender2.8":
|
||||
addons_branch = "blender2.8"
|
||||
c['schedulers'].append(timed.Nightly(name=scheduler_name,
|
||||
codebases={
|
||||
"blender": {"repository": ""},
|
||||
"blender-translations": {"repository": "", "branch": "master"},
|
||||
"blender-addons": {"repository": "", "branch": "master"},
|
||||
"blender-addons": {"repository": "", "branch": addons_branch},
|
||||
"blender-addons-contrib": {"repository": "", "branch": "master"},
|
||||
"blender-dev-tools": {"repository": "", "branch": "master"},
|
||||
"lib svn": {"repository": "", "branch": "trunk"}},
|
||||
@@ -225,8 +236,7 @@ def git_step(branch=''):
|
||||
|
||||
|
||||
def git_submodules_update():
|
||||
command = ['git', 'submodule', 'foreach', '--recursive',
|
||||
'git', 'pull', 'origin', 'master']
|
||||
command = ['git', 'submodule', 'update', '--remote']
|
||||
return ShellCommand(name='Submodules Update',
|
||||
command=command,
|
||||
description='updating',
|
||||
@@ -235,7 +245,10 @@ def git_submodules_update():
|
||||
|
||||
|
||||
def lib_svn_step(dir):
|
||||
return SVN(name='lib svn',
|
||||
name = "lib svn"
|
||||
if dir == "darwin":
|
||||
name = "C++11 lib svn"
|
||||
return SVN(name=name,
|
||||
baseURL='https://svn.blender.org/svnroot/bf-blender/%%BRANCH%%/lib/' + dir,
|
||||
codebase='lib svn',
|
||||
mode='update',
|
||||
@@ -264,6 +277,9 @@ def generic_builder(id, libdir='', branch='', rsync=False):
|
||||
f = BuildFactory()
|
||||
if libdir != '':
|
||||
f.addStep(lib_svn_step(libdir))
|
||||
# Special trick to make sure we always have all the libs.
|
||||
if libdir.startswith("darwin"):
|
||||
f.addStep(lib_svn_step("darwin"))
|
||||
|
||||
for submodule in ('blender-translations',
|
||||
'blender-addons',
|
||||
@@ -286,7 +302,7 @@ def generic_builder(id, libdir='', branch='', rsync=False):
|
||||
f.addStep(FileUpload(name='upload',
|
||||
slavesrc='buildbot_upload.zip',
|
||||
masterdest=filename,
|
||||
maxsize=150 * 1024 * 1024,
|
||||
maxsize=180 * 1024 * 1024,
|
||||
workdir='install'))
|
||||
f.addStep(MasterShellCommand(name='unpack',
|
||||
command=['python2.7', unpack_script, filename],
|
||||
|
@@ -67,6 +67,9 @@ def get_platform(filename):
|
||||
|
||||
|
||||
def get_branch(filename):
|
||||
if filename.startswith("blender-2.8"):
|
||||
return "blender2.8"
|
||||
|
||||
tokens = filename.split("-")
|
||||
branch = ""
|
||||
|
||||
|
@@ -72,10 +72,8 @@ if 'cmake' in builder:
|
||||
# Set up OSX architecture
|
||||
if builder.endswith('x86_64_10_6_cmake'):
|
||||
cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
|
||||
cmake_extra_options.append('-DWITH_CODEC_QUICKTIME=OFF')
|
||||
cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.6')
|
||||
build_cubins = False
|
||||
|
||||
|
||||
elif builder.startswith('win'):
|
||||
@@ -93,7 +91,6 @@ if 'cmake' in builder:
|
||||
elif builder.startswith('win32'):
|
||||
bits = 32
|
||||
cmake_options.extend(['-G', 'Visual Studio 12 2013'])
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/nvcc.exe')
|
||||
|
||||
elif builder.startswith('linux'):
|
||||
tokens = builder.split("_")
|
||||
@@ -113,8 +110,6 @@ if 'cmake' in builder:
|
||||
cuda_chroot_name = 'buildbot_' + deb_name + '_x86_64'
|
||||
targets = ['player', 'blender', 'cuda']
|
||||
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc')
|
||||
|
||||
cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file))
|
||||
|
||||
# Prepare CMake options needed to configure cuda binaries compilation.
|
||||
|
@@ -111,7 +111,8 @@ if builder.find('cmake') != -1:
|
||||
if builder.endswith('vc2015'):
|
||||
platform += "-vc14"
|
||||
builderified_name = 'blender-{}-{}-{}'.format(blender_full_version, git_hash, platform)
|
||||
if branch != '':
|
||||
# NOTE: Blender 2.8 is already respected by blender_full_version.
|
||||
if branch != '' and branch != 'blender2.8':
|
||||
builderified_name = branch + "-" + builderified_name
|
||||
|
||||
os.rename(result_file, "{}.zip".format(builderified_name))
|
||||
@@ -177,7 +178,8 @@ if builder.find('cmake') != -1:
|
||||
blender_hash,
|
||||
blender_glibc,
|
||||
blender_arch)
|
||||
if branch != '':
|
||||
# NOTE: Blender 2.8 is already respected by blender_full_version.
|
||||
if branch != '' and branch != 'blender2.8':
|
||||
package_name = branch + "-" + package_name
|
||||
|
||||
upload_filename = package_name + ".tar.bz2"
|
||||
|
@@ -56,7 +56,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
string(REGEX REPLACE "[\r\n]+" ";" _git_contains_branches "${_git_contains_branches}")
|
||||
string(REGEX REPLACE ";[ \t]+" ";" _git_contains_branches "${_git_contains_branches}")
|
||||
foreach(_branch ${_git_contains_branches})
|
||||
if (NOT "${_branch}" MATCHES "\\(HEAD.*")
|
||||
if(NOT "${_branch}" MATCHES "\\(HEAD.*")
|
||||
set(MY_WC_BRANCH "${_branch}")
|
||||
break()
|
||||
endif()
|
||||
|
@@ -1574,24 +1574,24 @@ macro(openmp_delayload
|
||||
endmacro()
|
||||
|
||||
MACRO(WINDOWS_SIGN_TARGET target)
|
||||
if (WITH_WINDOWS_CODESIGN)
|
||||
if (!SIGNTOOL_EXE)
|
||||
if(WITH_WINDOWS_CODESIGN)
|
||||
if(!SIGNTOOL_EXE)
|
||||
error("Codesigning is enabled, but signtool is not found")
|
||||
else()
|
||||
if (WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
if(WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
set(CODESIGNPASSWORD /p ${WINDOWS_CODESIGN_PFX_PASSWORD})
|
||||
else()
|
||||
if ($ENV{PFXPASSWORD})
|
||||
if($ENV{PFXPASSWORD})
|
||||
set(CODESIGNPASSWORD /p $ENV{PFXPASSWORD})
|
||||
else()
|
||||
message( FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
|
||||
message(FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
|
||||
endif()
|
||||
endif()
|
||||
add_custom_command(TARGET ${target}
|
||||
POST_BUILD
|
||||
COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
|
||||
VERBATIM
|
||||
)
|
||||
POST_BUILD
|
||||
COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
|
||||
VERBATIM
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
ENDMACRO()
|
||||
|
@@ -1,5 +1,7 @@
|
||||
set(PROJECT_DESCRIPTION "Blender is a very fast and versatile 3D modeller/renderer.")
|
||||
set(PROJECT_COPYRIGHT "Copyright (C) 2001-2012 Blender Foundation")
|
||||
string(TIMESTAMP CURRENT_YEAR "%Y")
|
||||
|
||||
set(PROJECT_DESCRIPTION "Blender is the free and open source 3D creation suite software.")
|
||||
set(PROJECT_COPYRIGHT "Copyright (C) 2001-${CURRENT_YEAR} Blender Foundation")
|
||||
set(PROJECT_CONTACT "foundation@blender.org")
|
||||
set(PROJECT_VENDOR "Blender Foundation")
|
||||
|
||||
@@ -38,8 +40,8 @@ unset(MY_WC_HASH)
|
||||
# Force Package Name
|
||||
execute_process(COMMAND date "+%Y%m%d" OUTPUT_VARIABLE CPACK_DATE OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER)
|
||||
if (MSVC)
|
||||
if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
if(MSVC)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(PACKAGE_ARCH windows64)
|
||||
else()
|
||||
set(PACKAGE_ARCH windows32)
|
||||
@@ -48,7 +50,7 @@ else(MSVC)
|
||||
set(PACKAGE_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
|
||||
if (CPACK_OVERRIDE_PACKAGENAME)
|
||||
if(CPACK_OVERRIDE_PACKAGENAME)
|
||||
set(CPACK_PACKAGE_FILE_NAME ${CPACK_OVERRIDE_PACKAGENAME}-${PACKAGE_ARCH})
|
||||
else()
|
||||
set(CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME_LOWER}-${MAJOR_VERSION}.${MINOR_VERSION}.${PATCH_VERSION}-git${CPACK_DATE}.${BUILD_REV}-${PACKAGE_ARCH})
|
||||
@@ -135,4 +137,3 @@ unset(MINOR_VERSION)
|
||||
unset(PATCH_VERSION)
|
||||
|
||||
unset(BUILD_REV)
|
||||
|
||||
|
@@ -33,7 +33,7 @@ endmacro()
|
||||
macro(windows_find_package package_name
|
||||
)
|
||||
if(WITH_WINDOWS_FIND_MODULES)
|
||||
find_package( ${package_name})
|
||||
find_package(${package_name})
|
||||
endif(WITH_WINDOWS_FIND_MODULES)
|
||||
endmacro()
|
||||
|
||||
|
@@ -681,7 +681,7 @@ Image classes
|
||||
|
||||
.. attribute:: zbuff
|
||||
|
||||
Use depth component of render as grey scale color - suitable for texture source.
|
||||
Use depth component of render as grayscale color - suitable for texture source.
|
||||
|
||||
:type: bool
|
||||
|
||||
@@ -817,7 +817,7 @@ Image classes
|
||||
|
||||
.. attribute:: zbuff
|
||||
|
||||
Use depth component of viewport as grey scale color - suitable for texture source.
|
||||
Use depth component of viewport as grayscale color - suitable for texture source.
|
||||
|
||||
:type: bool
|
||||
|
||||
@@ -1260,8 +1260,8 @@ Filter classes
|
||||
|
||||
.. class:: FilterGray
|
||||
|
||||
Filter for gray scale effect.
|
||||
Proportions of R, G and B contributions in the output gray scale are 28:151:77.
|
||||
Filter for grayscale effect.
|
||||
Proportions of R, G and B contributions in the output grayscale are 28:151:77.
|
||||
|
||||
.. attribute:: previous
|
||||
|
||||
|
@@ -427,9 +427,9 @@ if BLENDER_REVISION != "Unknown":
|
||||
BLENDER_VERSION_DOTS += " " + BLENDER_REVISION # '2.62.1 SHA1'
|
||||
|
||||
BLENDER_VERSION_PATH = "_".join(blender_version_strings) # '2_62_1'
|
||||
if bpy.app.version_cycle == "release":
|
||||
BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]),
|
||||
bpy.app.version_char) # '2_62_release'
|
||||
if bpy.app.version_cycle in {"rc", "release"}:
|
||||
# '2_62a_release'
|
||||
BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]), bpy.app.version_char)
|
||||
|
||||
# --------------------------DOWNLOADABLE FILES----------------------------------
|
||||
|
||||
|
@@ -96,6 +96,11 @@ def main():
|
||||
|
||||
rsync_base = "rsync://%s@%s:%s" % (args.user, args.rsync_server, args.rsync_root)
|
||||
|
||||
blenver = blenver_zip = ""
|
||||
api_name = ""
|
||||
branch = ""
|
||||
is_release = False
|
||||
|
||||
# I) Update local mirror using rsync.
|
||||
rsync_mirror_cmd = ("rsync", "--delete-after", "-avzz", rsync_base, args.mirror_dir)
|
||||
subprocess.run(rsync_mirror_cmd, env=dict(os.environ, RSYNC_PASSWORD=args.password))
|
||||
@@ -108,19 +113,24 @@ def main():
|
||||
subprocess.run(doc_gen_cmd)
|
||||
|
||||
# III) Get Blender version info.
|
||||
blenver = blenver_zip = ""
|
||||
getver_file = os.path.join(tmp_dir, "blendver.txt")
|
||||
getver_script = (""
|
||||
"import sys, bpy\n"
|
||||
"with open(sys.argv[-1], 'w') as f:\n"
|
||||
" f.write('%d_%d%s_release\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||
" if bpy.app.version_cycle in {'rc', 'release'} else '%d_%d_%d\\n' % bpy.app.version)\n"
|
||||
" f.write('%d_%d_%d' % bpy.app.version)\n")
|
||||
" is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
|
||||
" branch = bpy.app.build_branch.split()[0].decode()\n"
|
||||
" f.write('%d\\n' % is_release)\n"
|
||||
" f.write('%s\\n' % branch)\n"
|
||||
" f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||
" if is_release else '%s\\n' % branch)\n"
|
||||
" f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||
" if is_release else '%d_%d_%d' % bpy.app.version)\n")
|
||||
get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
|
||||
"--python-expr", getver_script, "--", getver_file)
|
||||
subprocess.run(get_ver_cmd)
|
||||
with open(getver_file) as f:
|
||||
blenver, blenver_zip = f.read().split("\n")
|
||||
is_release, branch, blenver, blenver_zip = f.read().split("\n")
|
||||
is_release = bool(int(is_release))
|
||||
os.remove(getver_file)
|
||||
|
||||
# IV) Build doc.
|
||||
@@ -132,7 +142,7 @@ def main():
|
||||
os.chdir(curr_dir)
|
||||
|
||||
# V) Cleanup existing matching dir in server mirror (if any), and copy new doc.
|
||||
api_name = "blender_python_api_%s" % blenver
|
||||
api_name = blenver
|
||||
api_dir = os.path.join(args.mirror_dir, api_name)
|
||||
if os.path.exists(api_dir):
|
||||
shutil.rmtree(api_dir)
|
||||
@@ -150,19 +160,15 @@ def main():
|
||||
os.rename(zip_path, os.path.join(api_dir, "%s.zip" % zip_name))
|
||||
|
||||
# VII) Create symlinks and html redirects.
|
||||
#~ os.symlink(os.path.join(DEFAULT_SYMLINK_ROOT, api_name, "contents.html"), os.path.join(api_dir, "index.html"))
|
||||
os.symlink("./contents.html", os.path.join(api_dir, "index.html"))
|
||||
if blenver.endswith("release"):
|
||||
symlink = os.path.join(args.mirror_dir, "blender_python_api_current")
|
||||
if is_release:
|
||||
symlink = os.path.join(args.mirror_dir, "current")
|
||||
os.remove(symlink)
|
||||
os.symlink("./%s" % api_name, symlink)
|
||||
with open(os.path.join(args.mirror_dir, "250PythonDoc/index.html"), 'w') as f:
|
||||
f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
|
||||
"content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
|
||||
else:
|
||||
symlink = os.path.join(args.mirror_dir, "blender_python_api_master")
|
||||
os.remove(symlink)
|
||||
os.symlink("./%s" % api_name, symlink)
|
||||
elif branch == "master":
|
||||
with open(os.path.join(args.mirror_dir, "blender_python_api/index.html"), 'w') as f:
|
||||
f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
|
||||
"content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
|
||||
|
2
extern/clew/README.blender
vendored
2
extern/clew/README.blender
vendored
@@ -1,5 +1,5 @@
|
||||
Project: OpenCL Wrangler
|
||||
URL: https://github.com/OpenCLWrangler/clew
|
||||
License: Apache 2.0
|
||||
Upstream version: 309a653
|
||||
Upstream version: 27a6867
|
||||
Local modifications: None
|
||||
|
31
extern/clew/include/clew.h
vendored
31
extern/clew/include/clew.h
vendored
@@ -369,7 +369,7 @@ typedef unsigned int cl_GLenum;
|
||||
#endif
|
||||
|
||||
/* Define basic vector types */
|
||||
/* WOrkaround for ppc64el platform: conflicts with bool from C++. */
|
||||
/* Workaround for ppc64el platform: conflicts with bool from C++. */
|
||||
#if defined( __VEC__ ) && !(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
#include <altivec.h> /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
|
||||
typedef vector unsigned char __cl_uchar16;
|
||||
@@ -2765,11 +2765,40 @@ CLEW_FUN_EXPORT PFNCLGETGLCONTEXTINFOKHR __clewGetGLContextInfoKH
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007
|
||||
#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
|
||||
#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
|
||||
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
|
||||
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
|
||||
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
|
||||
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
|
||||
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
|
||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
|
||||
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
|
||||
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
|
||||
#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
|
||||
#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
|
||||
#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
|
||||
#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C
|
||||
|
||||
#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
|
||||
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct { cl_uint type; cl_uint data[5]; } raw;
|
||||
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
|
||||
} cl_device_topology_amd;
|
||||
#endif
|
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension
|
||||
|
4
extern/clew/src/clew.c
vendored
4
extern/clew/src/clew.c
vendored
@@ -15,7 +15,7 @@
|
||||
|
||||
typedef HMODULE CLEW_DYNLIB_HANDLE;
|
||||
|
||||
#define CLEW_DYNLIB_OPEN LoadLibrary
|
||||
#define CLEW_DYNLIB_OPEN LoadLibraryA
|
||||
#define CLEW_DYNLIB_CLOSE FreeLibrary
|
||||
#define CLEW_DYNLIB_IMPORT GetProcAddress
|
||||
#else
|
||||
@@ -223,7 +223,7 @@ int clewInit()
|
||||
__clewSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLEW_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
|
||||
#endif
|
||||
__clewCreateBuffer = (PFNCLCREATEBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
|
||||
__clewCreateSubBuffer = (PFNCLCREATESUBBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
|
||||
__clewCreateSubBuffer = (PFNCLCREATESUBBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateSubBuffer");
|
||||
__clewCreateImage = (PFNCLCREATEIMAGE )CLEW_DYNLIB_IMPORT(module, "clCreateImage");
|
||||
__clewRetainMemObject = (PFNCLRETAINMEMOBJECT )CLEW_DYNLIB_IMPORT(module, "clRetainMemObject");
|
||||
__clewReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLEW_DYNLIB_IMPORT(module, "clReleaseMemObject");
|
||||
|
2
extern/cuew/include/cuew.h
vendored
2
extern/cuew/include/cuew.h
vendored
@@ -114,7 +114,7 @@ extern "C" {
|
||||
#define cuGLGetDevices cuGLGetDevices_v2
|
||||
|
||||
/* Types. */
|
||||
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
|
||||
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
|
||||
typedef unsigned long long CUdeviceptr;
|
||||
#else
|
||||
typedef unsigned int CUdeviceptr;
|
||||
|
@@ -34,7 +34,7 @@ add_subdirectory(mikktspace)
|
||||
add_subdirectory(glew-mx)
|
||||
add_subdirectory(eigen)
|
||||
|
||||
if (WITH_GAMEENGINE_DECKLINK)
|
||||
if(WITH_GAMEENGINE_DECKLINK)
|
||||
add_subdirectory(decklink)
|
||||
endif()
|
||||
|
||||
@@ -62,7 +62,7 @@ if(WITH_IK_ITASC)
|
||||
add_subdirectory(itasc)
|
||||
endif()
|
||||
|
||||
if(WITH_IK_SOLVER OR WITH_GAMEENGINE OR WITH_MOD_BOOLEAN)
|
||||
if(WITH_GAMEENGINE)
|
||||
add_subdirectory(moto)
|
||||
endif()
|
||||
|
||||
|
@@ -101,11 +101,11 @@ ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
|
||||
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
|
||||
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
|
||||
|
||||
ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
|
||||
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new);
|
||||
|
||||
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
|
||||
* which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads
|
||||
|
@@ -113,58 +113,58 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
|
||||
|
||||
/******************************************************************************/
|
||||
/* unsigned operations. */
|
||||
ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x)
|
||||
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
|
||||
{
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
|
||||
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
|
||||
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x)
|
||||
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
|
||||
{
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x)
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x)
|
||||
{
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
|
||||
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
|
||||
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x)
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
|
||||
{
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
|
||||
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new)
|
||||
{
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
|
||||
return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
|
||||
return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -365,6 +365,7 @@ bool AUD_SoftwareDevice::AUD_SoftwareHandle::seek(float position)
|
||||
if(!m_status)
|
||||
return false;
|
||||
|
||||
m_pitch->setPitch(m_user_pitch);
|
||||
m_reader->seek((int)(position * m_reader->getSpecs().rate));
|
||||
|
||||
if(m_status == AUD_STATUS_STOPPED)
|
||||
|
@@ -22,6 +22,7 @@ if(WITH_CYCLES_NATIVE_ONLY)
|
||||
-DWITH_KERNEL_NATIVE
|
||||
)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
set(CYCLES_KERNEL_FLAGS "-march=native")
|
||||
elseif(NOT WITH_CPU_SSE)
|
||||
set(CXX_HAS_SSE FALSE)
|
||||
set(CXX_HAS_AVX FALSE)
|
||||
@@ -59,10 +60,13 @@ elseif(WIN32 AND MSVC)
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox")
|
||||
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox")
|
||||
|
||||
set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
|
||||
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
|
||||
check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
|
||||
set(CYCLES_KERNEL_FLAGS "-ffast-math")
|
||||
if(CXX_HAS_SSE)
|
||||
set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse")
|
||||
set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse")
|
||||
@@ -74,10 +78,12 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
if(CXX_HAS_AVX2)
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
|
||||
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
|
||||
check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
|
||||
set(CYCLES_KERNEL_FLAGS "-ffast-math")
|
||||
if(CXX_HAS_SSE)
|
||||
set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2")
|
||||
set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3")
|
||||
@@ -89,6 +95,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
if(CXX_HAS_AVX2)
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_SSE)
|
||||
|
@@ -1,14 +1,6 @@
|
||||
|
||||
set(INC
|
||||
.
|
||||
../bvh
|
||||
../device
|
||||
../graph
|
||||
../kernel
|
||||
../kernel/svm
|
||||
../render
|
||||
../subd
|
||||
../util
|
||||
..
|
||||
)
|
||||
set(INC_SYS
|
||||
)
|
||||
|
@@ -16,15 +16,15 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "device/device.h"
|
||||
|
||||
#include "util_args.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_path.h"
|
||||
#include "util_stats.h"
|
||||
#include "util_string.h"
|
||||
#include "util_task.h"
|
||||
#include "util_logging.h"
|
||||
#include "util/util_args.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_stats.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_task.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
using namespace ccl;
|
||||
|
||||
|
@@ -16,29 +16,29 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "buffers.h"
|
||||
#include "camera.h"
|
||||
#include "device.h"
|
||||
#include "scene.h"
|
||||
#include "session.h"
|
||||
#include "integrator.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/camera.h"
|
||||
#include "device/device.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/session.h"
|
||||
#include "render/integrator.h"
|
||||
|
||||
#include "util_args.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_function.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_path.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_string.h"
|
||||
#include "util_time.h"
|
||||
#include "util_transform.h"
|
||||
#include "util_version.h"
|
||||
#include "util/util_args.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_time.h"
|
||||
#include "util/util_transform.h"
|
||||
#include "util/util_version.h"
|
||||
|
||||
#ifdef WITH_CYCLES_STANDALONE_GUI
|
||||
#include "util_view.h"
|
||||
#include "util/util_view.h"
|
||||
#endif
|
||||
|
||||
#include "cycles_xml.h"
|
||||
#include "app/cycles_xml.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -20,31 +20,31 @@
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
#include "node_xml.h"
|
||||
#include "graph/node_xml.h"
|
||||
|
||||
#include "background.h"
|
||||
#include "camera.h"
|
||||
#include "film.h"
|
||||
#include "graph.h"
|
||||
#include "integrator.h"
|
||||
#include "light.h"
|
||||
#include "mesh.h"
|
||||
#include "nodes.h"
|
||||
#include "object.h"
|
||||
#include "osl.h"
|
||||
#include "shader.h"
|
||||
#include "scene.h"
|
||||
#include "render/background.h"
|
||||
#include "render/camera.h"
|
||||
#include "render/film.h"
|
||||
#include "render/graph.h"
|
||||
#include "render/integrator.h"
|
||||
#include "render/light.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/nodes.h"
|
||||
#include "render/object.h"
|
||||
#include "render/osl.h"
|
||||
#include "render/shader.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "subd_patch.h"
|
||||
#include "subd_split.h"
|
||||
#include "subd/subd_patch.h"
|
||||
#include "subd/subd_split.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_path.h"
|
||||
#include "util_transform.h"
|
||||
#include "util_xml.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_transform.h"
|
||||
#include "util/util_xml.h"
|
||||
|
||||
#include "cycles_xml.h"
|
||||
#include "app/cycles_xml.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -1,12 +1,6 @@
|
||||
|
||||
set(INC
|
||||
../graph
|
||||
../render
|
||||
../device
|
||||
../kernel
|
||||
../kernel/svm
|
||||
../util
|
||||
../subd
|
||||
..
|
||||
../../glew-mx
|
||||
../../guardedalloc
|
||||
../../mikktspace
|
||||
|
@@ -107,7 +107,13 @@ def engine_exit():
|
||||
engine.exit()
|
||||
|
||||
|
||||
classes = (
|
||||
CyclesRender,
|
||||
)
|
||||
|
||||
|
||||
def register():
|
||||
from bpy.utils import register_class
|
||||
from . import ui
|
||||
from . import properties
|
||||
from . import presets
|
||||
@@ -122,12 +128,15 @@ def register():
|
||||
properties.register()
|
||||
ui.register()
|
||||
presets.register()
|
||||
bpy.utils.register_module(__name__)
|
||||
|
||||
for cls in classes:
|
||||
register_class(cls)
|
||||
|
||||
bpy.app.handlers.version_update.append(version_update.do_versions)
|
||||
|
||||
|
||||
def unregister():
|
||||
from bpy.utils import unregister_class
|
||||
from . import ui
|
||||
from . import properties
|
||||
from . import presets
|
||||
@@ -138,4 +147,6 @@ def unregister():
|
||||
ui.unregister()
|
||||
properties.unregister()
|
||||
presets.unregister()
|
||||
bpy.utils.unregister_module(__name__)
|
||||
|
||||
for cls in classes:
|
||||
unregister_class(cls)
|
||||
|
@@ -50,6 +50,24 @@ def _workaround_buggy_drivers():
|
||||
_cycles.opencl_disable()
|
||||
|
||||
|
||||
def _configure_argument_parser():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Cycles Addon argument parser")
|
||||
parser.add_argument("--cycles-resumable-num-chunks",
|
||||
help="Number of chunks to split sample range into",
|
||||
default=None)
|
||||
parser.add_argument("--cycles-resumable-current-chunk",
|
||||
help="Current chunk of samples range to render",
|
||||
default=None)
|
||||
parser.add_argument("--cycles-resumable-start-chunk",
|
||||
help="Start chunk to render",
|
||||
default=None)
|
||||
parser.add_argument("--cycles-resumable-end-chunk",
|
||||
help="End chunk to render",
|
||||
default=None)
|
||||
return parser
|
||||
|
||||
|
||||
def _parse_command_line():
|
||||
import sys
|
||||
|
||||
@@ -57,25 +75,22 @@ def _parse_command_line():
|
||||
if "--" not in argv:
|
||||
return
|
||||
|
||||
argv = argv[argv.index("--") + 1:]
|
||||
parser = _configure_argument_parser()
|
||||
args, unknown = parser.parse_known_args(argv[argv.index("--") + 1:])
|
||||
|
||||
num_resumable_chunks = None
|
||||
current_resumable_chunk = None
|
||||
|
||||
# TODO(sergey): Add some nice error prints if argument is not used properly.
|
||||
idx = 0
|
||||
while idx < len(argv) - 1:
|
||||
arg = argv[idx]
|
||||
if arg == '--cycles-resumable-num-chunks':
|
||||
num_resumable_chunks = int(argv[idx + 1])
|
||||
elif arg == '--cycles-resumable-current-chunk':
|
||||
current_resumable_chunk = int(argv[idx + 1])
|
||||
idx += 1
|
||||
|
||||
if num_resumable_chunks is not None and current_resumable_chunk is not None:
|
||||
import _cycles
|
||||
_cycles.set_resumable_chunks(num_resumable_chunks,
|
||||
current_resumable_chunk)
|
||||
if args.cycles_resumable_num_chunks is not None:
|
||||
if args.cycles_resumable_current_chunk is not None:
|
||||
import _cycles
|
||||
_cycles.set_resumable_chunk(
|
||||
int(args.cycles_resumable_num_chunks),
|
||||
int(args.cycles_resumable_current_chunk))
|
||||
elif args.cycles_resumable_start_chunk is not None and \
|
||||
args.cycles_resumable_end_chunk:
|
||||
import _cycles
|
||||
_cycles.set_resumable_chunk_range(
|
||||
int(args.cycles_resumable_num_chunks),
|
||||
int(args.cycles_resumable_start_chunk),
|
||||
int(args.cycles_resumable_end_chunk))
|
||||
|
||||
|
||||
def init():
|
||||
|
@@ -82,12 +82,23 @@ class AddPresetSampling(AddPresetBase, Operator):
|
||||
preset_subdir = "cycles/sampling"
|
||||
|
||||
|
||||
classes = (
|
||||
AddPresetIntegrator,
|
||||
AddPresetSampling,
|
||||
)
|
||||
|
||||
|
||||
def register():
|
||||
pass
|
||||
from bpy.utils import register_class
|
||||
for cls in classes:
|
||||
register_class(cls)
|
||||
|
||||
|
||||
def unregister():
|
||||
pass
|
||||
from bpy.utils import unregister_class
|
||||
for cls in classes:
|
||||
unregister_class(cls)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
register()
|
||||
|
@@ -665,8 +665,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
cls.debug_use_cpu_sse3 = BoolProperty(name="SSE3", default=True)
|
||||
cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
|
||||
cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)
|
||||
cls.debug_use_cpu_split_kernel = BoolProperty(name="Split Kernel", default=False)
|
||||
|
||||
cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
|
||||
cls.debug_use_cuda_split_kernel = BoolProperty(name="Split Kernel", default=False)
|
||||
|
||||
cls.debug_opencl_kernel_type = EnumProperty(
|
||||
name="OpenCL Kernel Type",
|
||||
@@ -693,6 +695,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
update=devices_update_callback
|
||||
)
|
||||
|
||||
cls.debug_opencl_kernel_single_program = BoolProperty(name="Single Program", default=False, update=devices_update_callback);
|
||||
|
||||
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
|
||||
|
||||
@classmethod
|
||||
@@ -1092,6 +1096,12 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
|
||||
default=1.0,
|
||||
)
|
||||
|
||||
cls.is_shadow_catcher = BoolProperty(
|
||||
name="Shadow Catcher",
|
||||
description="Only render shadows on this object, for compositing renders into real footage",
|
||||
default=False,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def unregister(cls):
|
||||
del bpy.types.Object.cycles
|
||||
|
@@ -86,12 +86,10 @@ def use_sample_all_lights(context):
|
||||
|
||||
return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect
|
||||
|
||||
def show_device_selection(context):
|
||||
type = get_device_type(context)
|
||||
if type == 'NETWORK':
|
||||
def show_device_active(context):
|
||||
cscene = context.scene.cycles
|
||||
if cscene.device != 'GPU':
|
||||
return True
|
||||
if not type in {'CUDA', 'OPENCL'}:
|
||||
return False
|
||||
return context.user_preferences.addons[__package__].preferences.has_active_device()
|
||||
|
||||
|
||||
@@ -186,9 +184,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
|
||||
sub.label(text="AA Samples:")
|
||||
sub.prop(cscene, "aa_samples", text="Render")
|
||||
sub.prop(cscene, "preview_aa_samples", text="Preview")
|
||||
sub.separator()
|
||||
sub.prop(cscene, "sample_all_lights_direct")
|
||||
sub.prop(cscene, "sample_all_lights_indirect")
|
||||
|
||||
col = split.column()
|
||||
sub = col.column(align=True)
|
||||
@@ -205,6 +200,10 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
|
||||
sub.prop(cscene, "subsurface_samples", text="Subsurface")
|
||||
sub.prop(cscene, "volume_samples", text="Volume")
|
||||
|
||||
col = layout.column(align=True)
|
||||
col.prop(cscene, "sample_all_lights_direct")
|
||||
col.prop(cscene, "sample_all_lights_indirect")
|
||||
|
||||
if not (use_opencl(context) and cscene.feature_set != 'EXPERIMENTAL'):
|
||||
layout.row().prop(cscene, "sampling_pattern", text="Pattern")
|
||||
|
||||
@@ -270,7 +269,7 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
|
||||
|
||||
row = col.row()
|
||||
row.prop(ccscene, "minimum_width", text="Min Pixels")
|
||||
row.prop(ccscene, "maximum_width", text="Max Ext.")
|
||||
row.prop(ccscene, "maximum_width", text="Max Extension")
|
||||
|
||||
|
||||
class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
|
||||
@@ -788,6 +787,8 @@ class CyclesObject_PT_cycles_settings(CyclesButtonsPanel, Panel):
|
||||
if ob.type != 'LAMP':
|
||||
flow.prop(visibility, "shadow")
|
||||
|
||||
layout.prop(cob, "is_shadow_catcher")
|
||||
|
||||
col = layout.column()
|
||||
col.label(text="Performance:")
|
||||
row = col.row()
|
||||
@@ -1518,15 +1519,18 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
|
||||
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
|
||||
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
|
||||
col.prop(cscene, "debug_use_qbvh")
|
||||
col.prop(cscene, "debug_use_cpu_split_kernel")
|
||||
|
||||
col = layout.column()
|
||||
col.label('CUDA Flags:')
|
||||
col.prop(cscene, "debug_use_cuda_adaptive_compile")
|
||||
col.prop(cscene, "debug_use_cuda_split_kernel")
|
||||
|
||||
col = layout.column()
|
||||
col.label('OpenCL Flags:')
|
||||
col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
|
||||
col.prop(cscene, "debug_opencl_device_type", text="Device")
|
||||
col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
|
||||
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
|
||||
|
||||
|
||||
@@ -1633,7 +1637,7 @@ def draw_device(self, context):
|
||||
split = layout.split(percentage=1/3)
|
||||
split.label("Device:")
|
||||
row = split.row()
|
||||
row.active = show_device_selection(context)
|
||||
row.active = show_device_active(context)
|
||||
row.prop(cscene, "device", text="")
|
||||
|
||||
if engine.with_osl() and use_cpu(context):
|
||||
@@ -1712,17 +1716,75 @@ def get_panels():
|
||||
|
||||
return panels
|
||||
|
||||
|
||||
classes = (
|
||||
CYCLES_MT_sampling_presets,
|
||||
CYCLES_MT_integrator_presets,
|
||||
CyclesRender_PT_sampling,
|
||||
CyclesRender_PT_geometry,
|
||||
CyclesRender_PT_light_paths,
|
||||
CyclesRender_PT_motion_blur,
|
||||
CyclesRender_PT_film,
|
||||
CyclesRender_PT_performance,
|
||||
CyclesRender_PT_layer_options,
|
||||
CyclesRender_PT_layer_passes,
|
||||
CyclesRender_PT_views,
|
||||
Cycles_PT_post_processing,
|
||||
CyclesCamera_PT_dof,
|
||||
Cycles_PT_context_material,
|
||||
CyclesObject_PT_motion_blur,
|
||||
CyclesObject_PT_cycles_settings,
|
||||
CYCLES_OT_use_shading_nodes,
|
||||
CyclesLamp_PT_preview,
|
||||
CyclesLamp_PT_lamp,
|
||||
CyclesLamp_PT_nodes,
|
||||
CyclesLamp_PT_spot,
|
||||
CyclesWorld_PT_preview,
|
||||
CyclesWorld_PT_surface,
|
||||
CyclesWorld_PT_volume,
|
||||
CyclesWorld_PT_ambient_occlusion,
|
||||
CyclesWorld_PT_mist,
|
||||
CyclesWorld_PT_ray_visibility,
|
||||
CyclesWorld_PT_settings,
|
||||
CyclesMaterial_PT_preview,
|
||||
CyclesMaterial_PT_surface,
|
||||
CyclesMaterial_PT_volume,
|
||||
CyclesMaterial_PT_displacement,
|
||||
CyclesMaterial_PT_settings,
|
||||
CyclesTexture_PT_context,
|
||||
CyclesTexture_PT_node,
|
||||
CyclesTexture_PT_mapping,
|
||||
CyclesTexture_PT_colors,
|
||||
CyclesParticle_PT_textures,
|
||||
CyclesRender_PT_bake,
|
||||
CyclesRender_PT_debug,
|
||||
CyclesParticle_PT_CurveSettings,
|
||||
CyclesScene_PT_simplify,
|
||||
)
|
||||
|
||||
|
||||
def register():
|
||||
from bpy.utils import register_class
|
||||
|
||||
bpy.types.RENDER_PT_render.append(draw_device)
|
||||
bpy.types.VIEW3D_HT_header.append(draw_pause)
|
||||
|
||||
for panel in get_panels():
|
||||
panel.COMPAT_ENGINES.add('CYCLES')
|
||||
|
||||
for cls in classes:
|
||||
register_class(cls)
|
||||
|
||||
|
||||
def unregister():
|
||||
from bpy.utils import unregister_class
|
||||
|
||||
bpy.types.RENDER_PT_render.remove(draw_device)
|
||||
bpy.types.VIEW3D_HT_header.remove(draw_pause)
|
||||
|
||||
for panel in get_panels():
|
||||
if 'CYCLES' in panel.COMPAT_ENGINES:
|
||||
panel.COMPAT_ENGINES.remove('CYCLES')
|
||||
|
||||
for cls in classes:
|
||||
unregister_class(cls)
|
||||
|
@@ -14,13 +14,13 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "camera.h"
|
||||
#include "scene.h"
|
||||
#include "render/camera.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "util_logging.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -14,18 +14,18 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "attribute.h"
|
||||
#include "camera.h"
|
||||
#include "curves.h"
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "render/attribute.h"
|
||||
#include "render/camera.h"
|
||||
#include "render/curves.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -411,6 +411,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
}
|
||||
}
|
||||
|
||||
mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size());
|
||||
mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
|
||||
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
|
||||
mesh->add_face_normals();
|
||||
@@ -434,8 +435,8 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
numverts += (CData->curve_keynum[curve] - 2)*2*resolution + resolution;
|
||||
numtris += (CData->curve_keynum[curve] - 2)*resolution;
|
||||
numverts += (CData->curve_keynum[curve] - 1)*resolution + resolution;
|
||||
numtris += (CData->curve_keynum[curve] - 1)*2*resolution;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -545,6 +546,7 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
|
||||
}
|
||||
}
|
||||
|
||||
mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size());
|
||||
mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
|
||||
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
|
||||
mesh->add_face_normals();
|
||||
@@ -890,7 +892,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
|
||||
}
|
||||
|
||||
/* obtain general settings */
|
||||
bool use_curves = scene->curve_system_manager->use_curves;
|
||||
const bool use_curves = scene->curve_system_manager->use_curves;
|
||||
|
||||
if(!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT)) {
|
||||
if(!motion)
|
||||
@@ -898,11 +900,11 @@ void BlenderSync::sync_curves(Mesh *mesh,
|
||||
return;
|
||||
}
|
||||
|
||||
int primitive = scene->curve_system_manager->primitive;
|
||||
int triangle_method = scene->curve_system_manager->triangle_method;
|
||||
int resolution = scene->curve_system_manager->resolution;
|
||||
size_t vert_num = mesh->verts.size();
|
||||
size_t tri_num = mesh->num_triangles();
|
||||
const int primitive = scene->curve_system_manager->primitive;
|
||||
const int triangle_method = scene->curve_system_manager->triangle_method;
|
||||
const int resolution = scene->curve_system_manager->resolution;
|
||||
const size_t vert_num = mesh->verts.size();
|
||||
const size_t tri_num = mesh->num_triangles();
|
||||
int used_res = 1;
|
||||
|
||||
/* extract particle hair data - should be combined with connecting to mesh later*/
|
||||
|
@@ -14,8 +14,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "CCL_api.h"
|
||||
#include "util_logging.h"
|
||||
#include "blender/CCL_api.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
void CCL_init_logging(const char *argv0)
|
||||
{
|
||||
|
@@ -15,21 +15,22 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "camera.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/camera.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_session.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_session.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "subd_patch.h"
|
||||
#include "subd_split.h"
|
||||
#include "subd/subd_patch.h"
|
||||
#include "subd/subd_split.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_math.h"
|
||||
#include "util/util_algorithm.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_math.h"
|
||||
|
||||
#include "mikktspace.h"
|
||||
|
||||
@@ -525,69 +526,177 @@ static void attr_create_uv_map(Scene *scene,
|
||||
}
|
||||
|
||||
/* Create vertex pointiness attributes. */
|
||||
|
||||
/* Compare vertices by sum of their coordinates. */
|
||||
class VertexAverageComparator {
|
||||
public:
|
||||
VertexAverageComparator(const array<float3>& verts)
|
||||
: verts_(verts) {
|
||||
}
|
||||
|
||||
bool operator()(const int& vert_idx_a, const int& vert_idx_b)
|
||||
{
|
||||
const float3 &vert_a = verts_[vert_idx_a];
|
||||
const float3 &vert_b = verts_[vert_idx_b];
|
||||
if(vert_a == vert_b) {
|
||||
/* Special case for doubles, so we ensure ordering. */
|
||||
return vert_idx_a > vert_idx_b;
|
||||
}
|
||||
const float x1 = vert_a.x + vert_a.y + vert_a.z;
|
||||
const float x2 = vert_b.x + vert_b.y + vert_b.z;
|
||||
return x1 < x2;
|
||||
}
|
||||
|
||||
protected:
|
||||
const array<float3>& verts_;
|
||||
};
|
||||
|
||||
static void attr_create_pointiness(Scene *scene,
|
||||
Mesh *mesh,
|
||||
BL::Mesh& b_mesh,
|
||||
bool subdivision)
|
||||
{
|
||||
if(mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
|
||||
const int numverts = b_mesh.vertices.length();
|
||||
AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
|
||||
Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
|
||||
float *data = attr->data_float();
|
||||
int *counter = new int[numverts];
|
||||
float *raw_data = new float[numverts];
|
||||
float3 *edge_accum = new float3[numverts];
|
||||
|
||||
/* Calculate pointiness using single ring neighborhood. */
|
||||
memset(counter, 0, sizeof(int) * numverts);
|
||||
memset(raw_data, 0, sizeof(float) * numverts);
|
||||
memset(edge_accum, 0, sizeof(float3) * numverts);
|
||||
BL::Mesh::edges_iterator e;
|
||||
int i = 0;
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
|
||||
int v0 = b_mesh.edges[i].vertices()[0],
|
||||
v1 = b_mesh.edges[i].vertices()[1];
|
||||
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
|
||||
co1 = get_float3(b_mesh.vertices[v1].co());
|
||||
float3 edge = normalize(co1 - co0);
|
||||
edge_accum[v0] += edge;
|
||||
edge_accum[v1] += -edge;
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
i = 0;
|
||||
BL::Mesh::vertices_iterator v;
|
||||
for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++i) {
|
||||
if(counter[i] > 0) {
|
||||
float3 normal = get_float3(b_mesh.vertices[i].normal());
|
||||
float angle = safe_acosf(dot(normal, edge_accum[i] / counter[i]));
|
||||
raw_data[i] = angle * M_1_PI_F;
|
||||
if(!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
|
||||
return;
|
||||
}
|
||||
const int num_verts = b_mesh.vertices.length();
|
||||
/* STEP 1: Find out duplicated vertices and point duplicates to a single
|
||||
* original vertex.
|
||||
*/
|
||||
vector<int> sorted_vert_indeices(num_verts);
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
sorted_vert_indeices[vert_index] = vert_index;
|
||||
}
|
||||
VertexAverageComparator compare(mesh->verts);
|
||||
sort(sorted_vert_indeices.begin(), sorted_vert_indeices.end(), compare);
|
||||
/* This array stores index of the original vertex for the given vertex
|
||||
* index.
|
||||
*/
|
||||
vector<int> vert_orig_index(num_verts);
|
||||
for(int sorted_vert_index = 0;
|
||||
sorted_vert_index < num_verts;
|
||||
++sorted_vert_index)
|
||||
{
|
||||
const int vert_index = sorted_vert_indeices[sorted_vert_index];
|
||||
const float3 &vert_co = mesh->verts[vert_index];
|
||||
bool found = false;
|
||||
for(int other_sorted_vert_index = sorted_vert_index + 1;
|
||||
other_sorted_vert_index < num_verts;
|
||||
++other_sorted_vert_index)
|
||||
{
|
||||
const int other_vert_index =
|
||||
sorted_vert_indeices[other_sorted_vert_index];
|
||||
const float3 &other_vert_co = mesh->verts[other_vert_index];
|
||||
/* We are too far away now, we wouldn't have duplicate. */
|
||||
if((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
|
||||
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else {
|
||||
raw_data[i] = 0.0f;
|
||||
/* Found duplicate. */
|
||||
if(len_squared(other_vert_co - vert_co) < FLT_EPSILON) {
|
||||
found = true;
|
||||
vert_orig_index[vert_index] = other_vert_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Blur vertices to approximate 2 ring neighborhood. */
|
||||
memset(counter, 0, sizeof(int) * numverts);
|
||||
memcpy(data, raw_data, sizeof(float) * numverts);
|
||||
i = 0;
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
|
||||
int v0 = b_mesh.edges[i].vertices()[0],
|
||||
v1 = b_mesh.edges[i].vertices()[1];
|
||||
data[v0] += raw_data[v1];
|
||||
data[v1] += raw_data[v0];
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
if(!found) {
|
||||
vert_orig_index[vert_index] = vert_index;
|
||||
}
|
||||
for(i = 0; i < numverts; ++i) {
|
||||
data[i] /= counter[i] + 1;
|
||||
}
|
||||
/* Make sure we always points to the very first orig vertex. */
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
int orig_index = vert_orig_index[vert_index];
|
||||
while(orig_index != vert_orig_index[orig_index]) {
|
||||
orig_index = vert_orig_index[orig_index];
|
||||
}
|
||||
|
||||
delete [] counter;
|
||||
delete [] raw_data;
|
||||
delete [] edge_accum;
|
||||
vert_orig_index[vert_index] = orig_index;
|
||||
}
|
||||
sorted_vert_indeices.free_memory();
|
||||
/* STEP 2: Calculate vertex normals taking into account their possible
|
||||
* duplicates which gets "welded" together.
|
||||
*/
|
||||
vector<float3> vert_normal(num_verts, make_float3(0.0f, 0.0f, 0.0f));
|
||||
/* First we accumulate all vertex normals in the original index. */
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const float3 normal = get_float3(b_mesh.vertices[vert_index].normal());
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
vert_normal[orig_index] += normal;
|
||||
}
|
||||
/* Then we normalize the accumulated result and flush it to all duplicates
|
||||
* as well.
|
||||
*/
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
vert_normal[vert_index] = normalize(vert_normal[orig_index]);
|
||||
}
|
||||
/* STEP 3: Calculate pointiness using single ring neighborhood. */
|
||||
vector<int> counter(num_verts, 0);
|
||||
vector<float> raw_data(num_verts, 0.0f);
|
||||
vector<float3> edge_accum(num_verts, make_float3(0.0f, 0.0f, 0.0f));
|
||||
BL::Mesh::edges_iterator e;
|
||||
EdgeMap visited_edges;
|
||||
int edge_index = 0;
|
||||
memset(&counter[0], 0, sizeof(int) * counter.size());
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
|
||||
const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
|
||||
v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
|
||||
if(visited_edges.exists(v0, v1)) {
|
||||
continue;
|
||||
}
|
||||
visited_edges.insert(v0, v1);
|
||||
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
|
||||
co1 = get_float3(b_mesh.vertices[v1].co());
|
||||
float3 edge = normalize(co1 - co0);
|
||||
edge_accum[v0] += edge;
|
||||
edge_accum[v1] += -edge;
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
if(orig_index != vert_index) {
|
||||
/* Skip duplicates, they'll be overwritten later on. */
|
||||
continue;
|
||||
}
|
||||
if(counter[vert_index] > 0) {
|
||||
const float3 normal = vert_normal[vert_index];
|
||||
const float angle =
|
||||
safe_acosf(dot(normal,
|
||||
edge_accum[vert_index] / counter[vert_index]));
|
||||
raw_data[vert_index] = angle * M_1_PI_F;
|
||||
}
|
||||
else {
|
||||
raw_data[vert_index] = 0.0f;
|
||||
}
|
||||
}
|
||||
/* STEP 3: Blur vertices to approximate 2 ring neighborhood. */
|
||||
AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
|
||||
Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
|
||||
float *data = attr->data_float();
|
||||
memcpy(data, &raw_data[0], sizeof(float) * raw_data.size());
|
||||
memset(&counter[0], 0, sizeof(int) * counter.size());
|
||||
edge_index = 0;
|
||||
visited_edges.clear();
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
|
||||
const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
|
||||
v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
|
||||
if(visited_edges.exists(v0, v1)) {
|
||||
continue;
|
||||
}
|
||||
visited_edges.insert(v0, v1);
|
||||
data[v0] += raw_data[v1];
|
||||
data[v1] += raw_data[v0];
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
data[vert_index] /= counter[vert_index] + 1;
|
||||
}
|
||||
/* STEP 4: Copy attribute to the duplicated vertices. */
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
data[vert_index] = data[orig_index];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -656,9 +765,6 @@ static void create_mesh(Scene *scene,
|
||||
generated[i++] = get_float3(v->undeformed_co())*size - loc;
|
||||
}
|
||||
|
||||
/* Create needed vertex attributes. */
|
||||
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
|
||||
|
||||
/* create faces */
|
||||
vector<int> nverts(numfaces);
|
||||
vector<int> face_flags(numfaces, FACE_FLAG_NONE);
|
||||
@@ -671,6 +777,15 @@ static void create_mesh(Scene *scene,
|
||||
int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
|
||||
bool smooth = f->use_smooth() || use_loop_normals;
|
||||
|
||||
if(use_loop_normals) {
|
||||
BL::Array<float, 12> loop_normals = f->split_normals();
|
||||
for(int i = 0; i < n; i++) {
|
||||
N[vi[i]] = make_float3(loop_normals[i * 3],
|
||||
loop_normals[i * 3 + 1],
|
||||
loop_normals[i * 3 + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create triangles.
|
||||
*
|
||||
* NOTE: Autosmooth is already taken care about.
|
||||
@@ -704,7 +819,7 @@ static void create_mesh(Scene *scene,
|
||||
int shader = clamp(p->material_index(), 0, used_shaders.size()-1);
|
||||
bool smooth = p->use_smooth() || use_loop_normals;
|
||||
|
||||
vi.reserve(n);
|
||||
vi.resize(n);
|
||||
for(int i = 0; i < n; i++) {
|
||||
/* NOTE: Autosmooth is already taken care about. */
|
||||
vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
|
||||
@@ -718,6 +833,7 @@ static void create_mesh(Scene *scene,
|
||||
/* Create all needed attributes.
|
||||
* The calculate functions will check whether they're needed or not.
|
||||
*/
|
||||
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
|
||||
attr_create_vertex_color(scene, mesh, b_mesh, nverts, face_flags, subdivision);
|
||||
attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision, subdivide_uvs);
|
||||
|
||||
@@ -1178,4 +1294,3 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@@ -14,24 +14,24 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "camera.h"
|
||||
#include "integrator.h"
|
||||
#include "graph.h"
|
||||
#include "light.h"
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "nodes.h"
|
||||
#include "particles.h"
|
||||
#include "shader.h"
|
||||
#include "render/camera.h"
|
||||
#include "render/integrator.h"
|
||||
#include "render/graph.h"
|
||||
#include "render/light.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/nodes.h"
|
||||
#include "render/particles.h"
|
||||
#include "render/shader.h"
|
||||
|
||||
#include "blender_object_cull.h"
|
||||
#include "blender_sync.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_object_cull.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_hash.h"
|
||||
#include "util_logging.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_hash.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -343,6 +343,13 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
|
||||
object_updated = true;
|
||||
}
|
||||
|
||||
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
|
||||
bool is_shadow_catcher = get_boolean(cobject, "is_shadow_catcher");
|
||||
if(is_shadow_catcher != object->is_shadow_catcher) {
|
||||
object->is_shadow_catcher = is_shadow_catcher;
|
||||
object_updated = true;
|
||||
}
|
||||
|
||||
/* object sync
|
||||
* transform comparison should not be needed, but duplis don't work perfect
|
||||
* in the depsgraph and may not signal changes, so this is a workaround */
|
||||
|
@@ -16,9 +16,9 @@
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "camera.h"
|
||||
#include "render/camera.h"
|
||||
|
||||
#include "blender_object_cull.h"
|
||||
#include "blender/blender_object_cull.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -17,8 +17,8 @@
|
||||
#ifndef __BLENDER_OBJECT_CULL_H__
|
||||
#define __BLENDER_OBJECT_CULL_H__
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "util_types.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -14,14 +14,14 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "particles.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/particles.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util/util_foreach.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -16,21 +16,21 @@
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#include "CCL_api.h"
|
||||
#include "blender/CCL_api.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_session.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_session.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_md5.h"
|
||||
#include "util_opengl.h"
|
||||
#include "util_path.h"
|
||||
#include "util_string.h"
|
||||
#include "util_types.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_md5.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
#ifdef WITH_OSL
|
||||
#include "osl.h"
|
||||
#include "render/osl.h"
|
||||
|
||||
#include <OSL/oslquery.h>
|
||||
#include <OSL/oslconfig.h>
|
||||
@@ -67,8 +67,10 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
|
||||
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
|
||||
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
|
||||
flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
|
||||
flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel");
|
||||
/* Synchronize CUDA flags. */
|
||||
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
|
||||
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
|
||||
/* Synchronize OpenCL kernel type. */
|
||||
switch(get_enum(cscene, "debug_opencl_kernel_type")) {
|
||||
case 0:
|
||||
@@ -104,6 +106,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
|
||||
}
|
||||
/* Synchronize other OpenCL flags. */
|
||||
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
|
||||
flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
|
||||
return flags.opencl.device_type != opencl_device_type ||
|
||||
flags.opencl.kernel_type != opencl_kernel_type;
|
||||
}
|
||||
@@ -641,7 +644,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
|
||||
static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
|
||||
{
|
||||
int num_resumable_chunks, current_resumable_chunk;
|
||||
if(!PyArg_ParseTuple(args, "ii",
|
||||
@@ -676,6 +679,53 @@ static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
|
||||
{
|
||||
int num_chunks, start_chunk, end_chunk;
|
||||
if(!PyArg_ParseTuple(args, "iii",
|
||||
&num_chunks,
|
||||
&start_chunk,
|
||||
&end_chunk)) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
if(num_chunks <= 0) {
|
||||
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
if(start_chunk < 1 || start_chunk > num_chunks) {
|
||||
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
if(end_chunk < 1 || end_chunk > num_chunks) {
|
||||
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
if(start_chunk > end_chunk) {
|
||||
fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
VLOG(1) << "Initialized resumable render: "
|
||||
<< "num_resumable_chunks=" << num_chunks << ", "
|
||||
<< "start_resumable_chunk=" << start_chunk
|
||||
<< "end_resumable_chunk=" << end_chunk;
|
||||
BlenderSession::num_resumable_chunks = num_chunks;
|
||||
BlenderSession::start_resumable_chunk = start_chunk;
|
||||
BlenderSession::end_resumable_chunk = end_chunk;
|
||||
|
||||
printf("Cycles: Will render chunks %d to %d of %d\n",
|
||||
start_chunk,
|
||||
end_chunk,
|
||||
num_chunks);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||
{
|
||||
vector<DeviceInfo>& devices = Device::available_devices();
|
||||
@@ -715,7 +765,8 @@ static PyMethodDef methods[] = {
|
||||
{"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},
|
||||
|
||||
/* Resumable render */
|
||||
{"set_resumable_chunks", set_resumable_chunks_func, METH_VARARGS, ""},
|
||||
{"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
|
||||
{"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},
|
||||
|
||||
/* Compute Device selection */
|
||||
{"get_device_types", get_device_types_func, METH_VARARGS, ""},
|
||||
|
@@ -16,36 +16,38 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "background.h"
|
||||
#include "buffers.h"
|
||||
#include "camera.h"
|
||||
#include "device.h"
|
||||
#include "integrator.h"
|
||||
#include "film.h"
|
||||
#include "light.h"
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "session.h"
|
||||
#include "shader.h"
|
||||
#include "render/background.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/camera.h"
|
||||
#include "device/device.h"
|
||||
#include "render/integrator.h"
|
||||
#include "render/film.h"
|
||||
#include "render/light.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/session.h"
|
||||
#include "render/shader.h"
|
||||
|
||||
#include "util_color.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_function.h"
|
||||
#include "util_hash.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_color.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_hash.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_session.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_session.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
bool BlenderSession::headless = false;
|
||||
int BlenderSession::num_resumable_chunks = 0;
|
||||
int BlenderSession::current_resumable_chunk = 0;
|
||||
int BlenderSession::start_resumable_chunk = 0;
|
||||
int BlenderSession::end_resumable_chunk = 0;
|
||||
|
||||
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
BL::UserPreferences& b_userpref,
|
||||
@@ -68,6 +70,7 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
background = true;
|
||||
last_redraw_time = 0.0;
|
||||
start_resize_time = 0.0;
|
||||
last_status_time = 0.0;
|
||||
}
|
||||
|
||||
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
@@ -93,6 +96,7 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
background = false;
|
||||
last_redraw_time = 0.0;
|
||||
start_resize_time = 0.0;
|
||||
last_status_time = 0.0;
|
||||
}
|
||||
|
||||
BlenderSession::~BlenderSession()
|
||||
@@ -989,10 +993,14 @@ void BlenderSession::update_status_progress()
|
||||
if(substatus.size() > 0)
|
||||
status += " | " + substatus;
|
||||
|
||||
if(status != last_status) {
|
||||
double current_time = time_dt();
|
||||
/* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
|
||||
* For headless rendering, only report when something significant changes to keep the console output readable. */
|
||||
if(status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
|
||||
b_engine.update_stats("", (timestatus + scene + status).c_str());
|
||||
b_engine.update_memory_stats(mem_used, mem_peak);
|
||||
last_status = status;
|
||||
last_status_time = current_time;
|
||||
}
|
||||
if(progress != last_progress) {
|
||||
b_engine.update_progress(progress);
|
||||
@@ -1342,9 +1350,21 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
|
||||
return;
|
||||
}
|
||||
|
||||
int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
|
||||
int range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
|
||||
int range_num_samples = num_samples_per_chunk;
|
||||
const int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
|
||||
|
||||
int range_start_sample, range_num_samples;
|
||||
if(current_resumable_chunk != 0) {
|
||||
/* Single chunk rendering. */
|
||||
range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
|
||||
range_num_samples = num_samples_per_chunk;
|
||||
}
|
||||
else {
|
||||
/* Ranged-chunks. */
|
||||
const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
|
||||
range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
|
||||
range_num_samples = num_chunks * num_samples_per_chunk;
|
||||
}
|
||||
/* Make sure we don't overshoot. */
|
||||
if(range_start_sample + range_num_samples > num_samples) {
|
||||
range_num_samples = num_samples - range_num_samples;
|
||||
}
|
||||
|
@@ -17,12 +17,12 @@
|
||||
#ifndef __BLENDER_SESSION_H__
|
||||
#define __BLENDER_SESSION_H__
|
||||
|
||||
#include "device.h"
|
||||
#include "scene.h"
|
||||
#include "session.h"
|
||||
#include "bake.h"
|
||||
#include "device/device.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/session.h"
|
||||
#include "render/bake.h"
|
||||
|
||||
#include "util_vector.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -113,6 +113,7 @@ public:
|
||||
string last_status;
|
||||
string last_error;
|
||||
float last_progress;
|
||||
double last_status_time;
|
||||
|
||||
int width, height;
|
||||
double start_resize_time;
|
||||
@@ -137,6 +138,10 @@ public:
|
||||
/* Current resumable chunk index to render. */
|
||||
static int current_resumable_chunk;
|
||||
|
||||
/* Alternative to single-chunk rendering to render a range of chunks. */
|
||||
static int start_resumable_chunk;
|
||||
static int end_resumable_chunk;
|
||||
|
||||
protected:
|
||||
void do_write_update_render_result(BL::RenderResult& b_rr,
|
||||
BL::RenderLayer& b_rlay,
|
||||
|
@@ -14,20 +14,23 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "background.h"
|
||||
#include "graph.h"
|
||||
#include "light.h"
|
||||
#include "nodes.h"
|
||||
#include "osl.h"
|
||||
#include "scene.h"
|
||||
#include "shader.h"
|
||||
#include "render/background.h"
|
||||
#include "render/graph.h"
|
||||
#include "render/light.h"
|
||||
#include "render/nodes.h"
|
||||
#include "render/osl.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/shader.h"
|
||||
|
||||
#include "blender_texture.h"
|
||||
#include "blender_sync.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_texture.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_string.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_set.h"
|
||||
#include "util/util_task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -609,7 +612,8 @@ static ShaderNode *add_node(Scene *scene,
|
||||
bool is_builtin = b_image.packed_file() ||
|
||||
b_image.source() == BL::Image::source_GENERATED ||
|
||||
b_image.source() == BL::Image::source_MOVIE ||
|
||||
b_engine.is_preview();
|
||||
(b_engine.is_preview() &&
|
||||
b_image.source() != BL::Image::source_SEQUENCE);
|
||||
|
||||
if(is_builtin) {
|
||||
/* for builtin images we're using image datablock name to find an image to
|
||||
@@ -662,7 +666,8 @@ static ShaderNode *add_node(Scene *scene,
|
||||
bool is_builtin = b_image.packed_file() ||
|
||||
b_image.source() == BL::Image::source_GENERATED ||
|
||||
b_image.source() == BL::Image::source_MOVIE ||
|
||||
b_engine.is_preview();
|
||||
(b_engine.is_preview() &&
|
||||
b_image.source() != BL::Image::source_SEQUENCE);
|
||||
|
||||
if(is_builtin) {
|
||||
int scene_frame = b_scene.frame_current();
|
||||
@@ -1162,6 +1167,9 @@ void BlenderSync::sync_materials(bool update_all)
|
||||
/* material loop */
|
||||
BL::BlendData::materials_iterator b_mat;
|
||||
|
||||
TaskPool pool;
|
||||
set<Shader*> updated_shaders;
|
||||
|
||||
for(b_data.materials.begin(b_mat); b_mat != b_data.materials.end(); ++b_mat) {
|
||||
Shader *shader;
|
||||
|
||||
@@ -1197,9 +1205,37 @@ void BlenderSync::sync_materials(bool update_all)
|
||||
shader->displacement_method = (experimental) ? get_displacement_method(cmat) : DISPLACE_BUMP;
|
||||
|
||||
shader->set_graph(graph);
|
||||
shader->tag_update(scene);
|
||||
|
||||
/* By simplifying the shader graph as soon as possible, some
|
||||
* redundant shader nodes might be removed which prevents loading
|
||||
* unnecessary attributes later.
|
||||
*
|
||||
* However, since graph simplification also accounts for e.g. mix
|
||||
* weight, this would cause frequent expensive resyncs in interactive
|
||||
* sessions, so for those sessions optimization is only performed
|
||||
* right before compiling.
|
||||
*/
|
||||
if(!preview) {
|
||||
pool.push(function_bind(&ShaderGraph::simplify, graph, scene));
|
||||
/* NOTE: Update shaders out of the threads since those routines
|
||||
* are accessing and writing to a global context.
|
||||
*/
|
||||
updated_shaders.insert(shader);
|
||||
}
|
||||
else {
|
||||
/* NOTE: Update tagging can access links which are being
|
||||
* optimized out.
|
||||
*/
|
||||
shader->tag_update(scene);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pool.wait_work();
|
||||
|
||||
foreach(Shader *shader, updated_shaders) {
|
||||
shader->tag_update(scene);
|
||||
}
|
||||
}
|
||||
|
||||
/* Sync World */
|
||||
|
@@ -14,29 +14,29 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "background.h"
|
||||
#include "camera.h"
|
||||
#include "film.h"
|
||||
#include "graph.h"
|
||||
#include "integrator.h"
|
||||
#include "light.h"
|
||||
#include "mesh.h"
|
||||
#include "nodes.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "shader.h"
|
||||
#include "curves.h"
|
||||
#include "render/background.h"
|
||||
#include "render/camera.h"
|
||||
#include "render/film.h"
|
||||
#include "render/graph.h"
|
||||
#include "render/integrator.h"
|
||||
#include "render/light.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/nodes.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/shader.h"
|
||||
#include "render/curves.h"
|
||||
|
||||
#include "device.h"
|
||||
#include "device/device.h"
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "blender_session.h"
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_session.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_opengl.h"
|
||||
#include "util_hash.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_hash.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -22,15 +22,15 @@
|
||||
#include "RNA_access.h"
|
||||
#include "RNA_blender_cpp.h"
|
||||
|
||||
#include "blender_util.h"
|
||||
#include "blender/blender_util.h"
|
||||
|
||||
#include "scene.h"
|
||||
#include "session.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/session.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_set.h"
|
||||
#include "util_transform.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_set.h"
|
||||
#include "util/util_transform.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -14,7 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "blender_texture.h"
|
||||
#include "blender/blender_texture.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -18,7 +18,7 @@
|
||||
#define __BLENDER_TEXTURE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "blender_sync.h"
|
||||
#include "blender/blender_sync.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -17,14 +17,15 @@
|
||||
#ifndef __BLENDER_UTIL_H__
|
||||
#define __BLENDER_UTIL_H__
|
||||
|
||||
#include "mesh.h"
|
||||
#include "render/mesh.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_path.h"
|
||||
#include "util_set.h"
|
||||
#include "util_transform.h"
|
||||
#include "util_types.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_algorithm.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_set.h"
|
||||
#include "util/util_transform.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
/* Hacks to hook into Blender API
|
||||
* todo: clean this up ... */
|
||||
@@ -78,7 +79,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
|
||||
me.calc_normals_split();
|
||||
}
|
||||
else {
|
||||
me.split_faces();
|
||||
me.split_faces(false);
|
||||
}
|
||||
}
|
||||
if(subdivision_type == Mesh::SUBDIVISION_NONE) {
|
||||
@@ -173,22 +174,19 @@ static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,
|
||||
|
||||
if(rgb_curve) {
|
||||
BL::CurveMap mapI = cumap.curves[3];
|
||||
|
||||
for(int i = 0; i < size; i++) {
|
||||
float t = min_x + (float)i/(float)(size-1) * range_x;
|
||||
|
||||
data[i][0] = mapR.evaluate(mapI.evaluate(t));
|
||||
data[i][1] = mapG.evaluate(mapI.evaluate(t));
|
||||
data[i][2] = mapB.evaluate(mapI.evaluate(t));
|
||||
const float t = min_x + (float)i/(float)(size-1) * range_x;
|
||||
data[i] = make_float3(mapR.evaluate(mapI.evaluate(t)),
|
||||
mapG.evaluate(mapI.evaluate(t)),
|
||||
mapB.evaluate(mapI.evaluate(t)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(int i = 0; i < size; i++) {
|
||||
float t = min_x + (float)i/(float)(size-1) * range_x;
|
||||
|
||||
data[i][0] = mapR.evaluate(t);
|
||||
data[i][1] = mapG.evaluate(t);
|
||||
data[i][2] = mapB.evaluate(t);
|
||||
data[i] = make_float3(mapR.evaluate(t),
|
||||
mapG.evaluate(t),
|
||||
mapB.evaluate(t));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -786,6 +784,35 @@ struct ParticleSystemKey {
|
||||
}
|
||||
};
|
||||
|
||||
class EdgeMap {
|
||||
public:
|
||||
EdgeMap() {
|
||||
}
|
||||
|
||||
void clear() {
|
||||
edges_.clear();
|
||||
}
|
||||
|
||||
void insert(int v0, int v1) {
|
||||
get_sorted_verts(v0, v1);
|
||||
edges_.insert(std::pair<int, int>(v0, v1));
|
||||
}
|
||||
|
||||
bool exists(int v0, int v1) {
|
||||
get_sorted_verts(v0, v1);
|
||||
return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
|
||||
}
|
||||
|
||||
protected:
|
||||
void get_sorted_verts(int& v0, int& v1) {
|
||||
if(v0 > v1) {
|
||||
swap(v0, v1);
|
||||
}
|
||||
}
|
||||
|
||||
set< std::pair<int, int> > edges_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __BLENDER_UTIL_H__ */
|
||||
|
@@ -1,12 +1,6 @@
|
||||
|
||||
set(INC
|
||||
.
|
||||
../graph
|
||||
../kernel
|
||||
../kernel/svm
|
||||
../render
|
||||
../util
|
||||
../device
|
||||
..
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
|
@@ -15,25 +15,25 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "curves.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/curves.h"
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_build.h"
|
||||
#include "bvh_node.h"
|
||||
#include "bvh_params.h"
|
||||
#include "bvh_unaligned.h"
|
||||
#include "bvh/bvh.h"
|
||||
#include "bvh/bvh_build.h"
|
||||
#include "bvh/bvh_node.h"
|
||||
#include "bvh/bvh_params.h"
|
||||
#include "bvh/bvh_unaligned.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_map.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_system.h"
|
||||
#include "util_types.h"
|
||||
#include "util_math.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_system.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_math.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -67,7 +67,7 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
|
||||
if(params.use_qbvh)
|
||||
return new QBVH(params, objects);
|
||||
else
|
||||
return new RegularBVH(params, objects);
|
||||
return new BinaryBVH(params, objects);
|
||||
}
|
||||
|
||||
/* Building */
|
||||
@@ -81,6 +81,7 @@ void BVH::build(Progress& progress)
|
||||
pack.prim_type,
|
||||
pack.prim_index,
|
||||
pack.prim_object,
|
||||
pack.prim_time,
|
||||
params,
|
||||
progress);
|
||||
BVHNode *root = bvh_build.run();
|
||||
@@ -256,6 +257,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack.leaf_nodes.resize(leaf_nodes_size);
|
||||
pack.object_node.resize(objects.size());
|
||||
|
||||
if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
|
||||
pack.prim_time.resize(prim_index_size);
|
||||
}
|
||||
|
||||
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
|
||||
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
|
||||
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
|
||||
@@ -264,6 +269,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
|
||||
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
|
||||
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
|
||||
float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL;
|
||||
|
||||
/* merge */
|
||||
foreach(Object *ob, objects) {
|
||||
@@ -309,6 +315,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int *bvh_prim_type = &bvh->pack.prim_type[0];
|
||||
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
|
||||
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
|
||||
float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL;
|
||||
|
||||
for(size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
|
||||
@@ -324,6 +331,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
||||
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
|
||||
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
|
||||
if(bvh_prim_time != NULL) {
|
||||
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
|
||||
}
|
||||
pack_prim_index_offset++;
|
||||
}
|
||||
}
|
||||
@@ -414,64 +424,64 @@ static bool node_bvh_is_unaligned(const BVHNode *node)
|
||||
{
|
||||
const BVHNode *node0 = node->get_child(0),
|
||||
*node1 = node->get_child(1);
|
||||
return node0->is_unaligned() || node1->is_unaligned();
|
||||
return node0->is_unaligned || node1->is_unaligned;
|
||||
}
|
||||
|
||||
RegularBVH::RegularBVH(const BVHParams& params_, const vector<Object*>& objects_)
|
||||
BinaryBVH::BinaryBVH(const BVHParams& params_, const vector<Object*>& objects_)
|
||||
: BVH(params_, objects_)
|
||||
{
|
||||
}
|
||||
|
||||
void RegularBVH::pack_leaf(const BVHStackEntry& e,
|
||||
const LeafNode *leaf)
|
||||
void BinaryBVH::pack_leaf(const BVHStackEntry& e,
|
||||
const LeafNode *leaf)
|
||||
{
|
||||
assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
|
||||
float4 data[BVH_NODE_LEAF_SIZE];
|
||||
memset(data, 0, sizeof(data));
|
||||
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
|
||||
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
|
||||
/* object */
|
||||
data[0].x = __int_as_float(~(leaf->m_lo));
|
||||
data[0].x = __int_as_float(~(leaf->lo));
|
||||
data[0].y = __int_as_float(0);
|
||||
}
|
||||
else {
|
||||
/* triangle */
|
||||
data[0].x = __int_as_float(leaf->m_lo);
|
||||
data[0].y = __int_as_float(leaf->m_hi);
|
||||
data[0].x = __int_as_float(leaf->lo);
|
||||
data[0].y = __int_as_float(leaf->hi);
|
||||
}
|
||||
data[0].z = __uint_as_float(leaf->m_visibility);
|
||||
data[0].z = __uint_as_float(leaf->visibility);
|
||||
if(leaf->num_triangles() != 0) {
|
||||
data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
|
||||
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
|
||||
}
|
||||
|
||||
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
|
||||
}
|
||||
|
||||
void RegularBVH::pack_inner(const BVHStackEntry& e,
|
||||
const BVHStackEntry& e0,
|
||||
const BVHStackEntry& e1)
|
||||
void BinaryBVH::pack_inner(const BVHStackEntry& e,
|
||||
const BVHStackEntry& e0,
|
||||
const BVHStackEntry& e1)
|
||||
{
|
||||
if(e0.node->is_unaligned() || e1.node->is_unaligned()) {
|
||||
if(e0.node->is_unaligned || e1.node->is_unaligned) {
|
||||
pack_unaligned_inner(e, e0, e1);
|
||||
} else {
|
||||
pack_aligned_inner(e, e0, e1);
|
||||
}
|
||||
}
|
||||
|
||||
void RegularBVH::pack_aligned_inner(const BVHStackEntry& e,
|
||||
const BVHStackEntry& e0,
|
||||
const BVHStackEntry& e1)
|
||||
void BinaryBVH::pack_aligned_inner(const BVHStackEntry& e,
|
||||
const BVHStackEntry& e0,
|
||||
const BVHStackEntry& e1)
|
||||
{
|
||||
pack_aligned_node(e.idx,
|
||||
e0.node->m_bounds, e1.node->m_bounds,
|
||||
e0.node->bounds, e1.node->bounds,
|
||||
e0.encodeIdx(), e1.encodeIdx(),
|
||||
e0.node->m_visibility, e1.node->m_visibility);
|
||||
e0.node->visibility, e1.node->visibility);
|
||||
}
|
||||
|
||||
void RegularBVH::pack_aligned_node(int idx,
|
||||
const BoundBox& b0,
|
||||
const BoundBox& b1,
|
||||
int c0, int c1,
|
||||
uint visibility0, uint visibility1)
|
||||
void BinaryBVH::pack_aligned_node(int idx,
|
||||
const BoundBox& b0,
|
||||
const BoundBox& b1,
|
||||
int c0, int c1,
|
||||
uint visibility0, uint visibility1)
|
||||
{
|
||||
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
|
||||
assert(c0 < 0 || c0 < pack.nodes.size());
|
||||
@@ -498,26 +508,26 @@ void RegularBVH::pack_aligned_node(int idx,
|
||||
memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE);
|
||||
}
|
||||
|
||||
void RegularBVH::pack_unaligned_inner(const BVHStackEntry& e,
|
||||
const BVHStackEntry& e0,
|
||||
const BVHStackEntry& e1)
|
||||
void BinaryBVH::pack_unaligned_inner(const BVHStackEntry& e,
|
||||
const BVHStackEntry& e0,
|
||||
const BVHStackEntry& e1)
|
||||
{
|
||||
pack_unaligned_node(e.idx,
|
||||
e0.node->get_aligned_space(),
|
||||
e1.node->get_aligned_space(),
|
||||
e0.node->m_bounds,
|
||||
e1.node->m_bounds,
|
||||
e0.node->bounds,
|
||||
e1.node->bounds,
|
||||
e0.encodeIdx(), e1.encodeIdx(),
|
||||
e0.node->m_visibility, e1.node->m_visibility);
|
||||
e0.node->visibility, e1.node->visibility);
|
||||
}
|
||||
|
||||
void RegularBVH::pack_unaligned_node(int idx,
|
||||
const Transform& aligned_space0,
|
||||
const Transform& aligned_space1,
|
||||
const BoundBox& bounds0,
|
||||
const BoundBox& bounds1,
|
||||
int c0, int c1,
|
||||
uint visibility0, uint visibility1)
|
||||
void BinaryBVH::pack_unaligned_node(int idx,
|
||||
const Transform& aligned_space0,
|
||||
const Transform& aligned_space1,
|
||||
const BoundBox& bounds0,
|
||||
const BoundBox& bounds1,
|
||||
int c0, int c1,
|
||||
uint visibility0, uint visibility1)
|
||||
{
|
||||
assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
|
||||
assert(c0 < 0 || c0 < pack.nodes.size());
|
||||
@@ -543,7 +553,7 @@ void RegularBVH::pack_unaligned_node(int idx,
|
||||
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
|
||||
}
|
||||
|
||||
void RegularBVH::pack_nodes(const BVHNode *root)
|
||||
void BinaryBVH::pack_nodes(const BVHNode *root)
|
||||
{
|
||||
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
|
||||
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
|
||||
@@ -620,7 +630,7 @@ void RegularBVH::pack_nodes(const BVHNode *root)
|
||||
pack.root_index = (root->is_leaf())? -1: 0;
|
||||
}
|
||||
|
||||
void RegularBVH::refit_nodes()
|
||||
void BinaryBVH::refit_nodes()
|
||||
{
|
||||
assert(!params.top_level);
|
||||
|
||||
@@ -629,7 +639,7 @@ void RegularBVH::refit_nodes()
|
||||
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
|
||||
}
|
||||
|
||||
void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
|
||||
void BinaryBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
|
||||
{
|
||||
if(leaf) {
|
||||
assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
|
||||
@@ -759,18 +769,18 @@ static bool node_qbvh_is_unaligned(const BVHNode *node)
|
||||
*node1 = node->get_child(1);
|
||||
bool has_unaligned = false;
|
||||
if(node0->is_leaf()) {
|
||||
has_unaligned |= node0->is_unaligned();
|
||||
has_unaligned |= node0->is_unaligned;
|
||||
}
|
||||
else {
|
||||
has_unaligned |= node0->get_child(0)->is_unaligned();
|
||||
has_unaligned |= node0->get_child(1)->is_unaligned();
|
||||
has_unaligned |= node0->get_child(0)->is_unaligned;
|
||||
has_unaligned |= node0->get_child(1)->is_unaligned;
|
||||
}
|
||||
if(node1->is_leaf()) {
|
||||
has_unaligned |= node1->is_unaligned();
|
||||
has_unaligned |= node1->is_unaligned;
|
||||
}
|
||||
else {
|
||||
has_unaligned |= node1->get_child(0)->is_unaligned();
|
||||
has_unaligned |= node1->get_child(1)->is_unaligned();
|
||||
has_unaligned |= node1->get_child(0)->is_unaligned;
|
||||
has_unaligned |= node1->get_child(1)->is_unaligned;
|
||||
}
|
||||
return has_unaligned;
|
||||
}
|
||||
@@ -785,19 +795,19 @@ void QBVH::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
|
||||
{
|
||||
float4 data[BVH_QNODE_LEAF_SIZE];
|
||||
memset(data, 0, sizeof(data));
|
||||
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
|
||||
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
|
||||
/* object */
|
||||
data[0].x = __int_as_float(~(leaf->m_lo));
|
||||
data[0].x = __int_as_float(~(leaf->lo));
|
||||
data[0].y = __int_as_float(0);
|
||||
}
|
||||
else {
|
||||
/* triangle */
|
||||
data[0].x = __int_as_float(leaf->m_lo);
|
||||
data[0].y = __int_as_float(leaf->m_hi);
|
||||
data[0].x = __int_as_float(leaf->lo);
|
||||
data[0].y = __int_as_float(leaf->hi);
|
||||
}
|
||||
data[0].z = __uint_as_float(leaf->m_visibility);
|
||||
data[0].z = __uint_as_float(leaf->visibility);
|
||||
if(leaf->num_triangles() != 0) {
|
||||
data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
|
||||
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
|
||||
}
|
||||
|
||||
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
|
||||
@@ -813,7 +823,7 @@ void QBVH::pack_inner(const BVHStackEntry& e,
|
||||
*/
|
||||
if(params.use_unaligned_nodes) {
|
||||
for(int i = 0; i < num; i++) {
|
||||
if(en[i].node->is_unaligned()) {
|
||||
if(en[i].node->is_unaligned) {
|
||||
has_unaligned = true;
|
||||
break;
|
||||
}
|
||||
@@ -838,15 +848,15 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
|
||||
BoundBox bounds[4];
|
||||
int child[4];
|
||||
for(int i = 0; i < num; ++i) {
|
||||
bounds[i] = en[i].node->m_bounds;
|
||||
bounds[i] = en[i].node->bounds;
|
||||
child[i] = en[i].encodeIdx();
|
||||
}
|
||||
pack_aligned_node(e.idx,
|
||||
bounds,
|
||||
child,
|
||||
e.node->m_visibility,
|
||||
e.node->m_time_from,
|
||||
e.node->m_time_to,
|
||||
e.node->visibility,
|
||||
e.node->time_from,
|
||||
e.node->time_to,
|
||||
num);
|
||||
}
|
||||
|
||||
@@ -907,16 +917,16 @@ void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
|
||||
int child[4];
|
||||
for(int i = 0; i < num; ++i) {
|
||||
aligned_space[i] = en[i].node->get_aligned_space();
|
||||
bounds[i] = en[i].node->m_bounds;
|
||||
bounds[i] = en[i].node->bounds;
|
||||
child[i] = en[i].encodeIdx();
|
||||
}
|
||||
pack_unaligned_node(e.idx,
|
||||
aligned_space,
|
||||
bounds,
|
||||
child,
|
||||
e.node->m_visibility,
|
||||
e.node->m_time_from,
|
||||
e.node->m_time_to,
|
||||
e.node->visibility,
|
||||
e.node->time_from,
|
||||
e.node->time_to,
|
||||
num);
|
||||
}
|
||||
|
||||
|
@@ -18,10 +18,10 @@
|
||||
#ifndef __BVH_H__
|
||||
#define __BVH_H__
|
||||
|
||||
#include "bvh_params.h"
|
||||
#include "bvh/bvh_params.h"
|
||||
|
||||
#include "util_types.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -68,6 +68,8 @@ struct PackedBVH {
|
||||
array<int> prim_index;
|
||||
/* mapping from BVH primitive index, to the object id of that primitive. */
|
||||
array<int> prim_object;
|
||||
/* Time range of BVH primitive. */
|
||||
array<float2> prim_time;
|
||||
|
||||
/* index of the root node. */
|
||||
int root_index;
|
||||
@@ -108,15 +110,15 @@ protected:
|
||||
virtual void refit_nodes() = 0;
|
||||
};
|
||||
|
||||
/* Regular BVH
|
||||
/* Binary BVH
|
||||
*
|
||||
* Typical BVH with each node having two children. */
|
||||
|
||||
class RegularBVH : public BVH {
|
||||
class BinaryBVH : public BVH {
|
||||
protected:
|
||||
/* constructor */
|
||||
friend class BVH;
|
||||
RegularBVH(const BVHParams& params, const vector<Object*>& objects);
|
||||
BinaryBVH(const BVHParams& params, const vector<Object*>& objects);
|
||||
|
||||
/* pack */
|
||||
void pack_nodes(const BVHNode *root);
|
||||
|
@@ -19,11 +19,11 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "bvh_binning.h"
|
||||
#include "bvh/bvh_binning.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_boundbox.h"
|
||||
#include "util_types.h"
|
||||
#include "util/util_algorithm.h"
|
||||
#include "util/util_boundbox.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -18,10 +18,10 @@
|
||||
#ifndef __BVH_BINNING_H__
|
||||
#define __BVH_BINNING_H__
|
||||
|
||||
#include "bvh_params.h"
|
||||
#include "bvh_unaligned.h"
|
||||
#include "bvh/bvh_params.h"
|
||||
#include "bvh/bvh_unaligned.h"
|
||||
|
||||
#include "util_types.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -15,26 +15,26 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "bvh_binning.h"
|
||||
#include "bvh_build.h"
|
||||
#include "bvh_node.h"
|
||||
#include "bvh_params.h"
|
||||
#include "bvh/bvh_binning.h"
|
||||
#include "bvh/bvh_build.h"
|
||||
#include "bvh/bvh_node.h"
|
||||
#include "bvh/bvh_params.h"
|
||||
#include "bvh_split.h"
|
||||
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "scene.h"
|
||||
#include "curves.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/curves.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_stack_allocator.h"
|
||||
#include "util_simd.h"
|
||||
#include "util_time.h"
|
||||
#include "util_queue.h"
|
||||
#include "util/util_algorithm.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_stack_allocator.h"
|
||||
#include "util/util_simd.h"
|
||||
#include "util/util_time.h"
|
||||
#include "util/util_queue.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -93,12 +93,14 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
|
||||
array<int>& prim_type_,
|
||||
array<int>& prim_index_,
|
||||
array<int>& prim_object_,
|
||||
array<float2>& prim_time_,
|
||||
const BVHParams& params_,
|
||||
Progress& progress_)
|
||||
: objects(objects_),
|
||||
prim_type(prim_type_),
|
||||
prim_index(prim_index_),
|
||||
prim_object(prim_object_),
|
||||
prim_time(prim_time_),
|
||||
params(params_),
|
||||
progress(progress_),
|
||||
progress_start_time(0.0),
|
||||
@@ -465,6 +467,9 @@ BVHNode* BVHBuild::run()
|
||||
}
|
||||
spatial_free_index = 0;
|
||||
|
||||
need_prim_time = params.num_motion_curve_steps > 0 ||
|
||||
params.num_motion_triangle_steps > 0;
|
||||
|
||||
/* init progress updates */
|
||||
double build_start_time;
|
||||
build_start_time = progress_start_time = time_dt();
|
||||
@@ -475,6 +480,12 @@ BVHNode* BVHBuild::run()
|
||||
prim_type.resize(references.size());
|
||||
prim_index.resize(references.size());
|
||||
prim_object.resize(references.size());
|
||||
if(need_prim_time) {
|
||||
prim_time.resize(references.size());
|
||||
}
|
||||
else {
|
||||
prim_time.resize(0);
|
||||
}
|
||||
|
||||
/* build recursively */
|
||||
BVHNode *rootnode;
|
||||
@@ -849,11 +860,14 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
|
||||
prim_type[start] = ref->prim_type();
|
||||
prim_index[start] = ref->prim_index();
|
||||
prim_object[start] = ref->prim_object();
|
||||
if(need_prim_time) {
|
||||
prim_time[start] = make_float2(ref->time_from(), ref->time_to());
|
||||
}
|
||||
|
||||
uint visibility = objects[ref->prim_object()]->visibility;
|
||||
BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start+1);
|
||||
leaf_node->m_time_from = ref->time_from();
|
||||
leaf_node->m_time_to = ref->time_to();
|
||||
leaf_node->time_from = ref->time_from();
|
||||
leaf_node->time_to = ref->time_to();
|
||||
return leaf_node;
|
||||
}
|
||||
else {
|
||||
@@ -862,12 +876,12 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
|
||||
BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
|
||||
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
bounds.grow(leaf0->m_bounds);
|
||||
bounds.grow(leaf1->m_bounds);
|
||||
bounds.grow(leaf0->bounds);
|
||||
bounds.grow(leaf1->bounds);
|
||||
|
||||
BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
|
||||
inner_node->m_time_from = min(leaf0->m_time_from, leaf1->m_time_from);
|
||||
inner_node->m_time_to = max(leaf0->m_time_to, leaf1->m_time_to);
|
||||
inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
|
||||
inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
|
||||
return inner_node;
|
||||
}
|
||||
}
|
||||
@@ -891,11 +905,13 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
* can not control.
|
||||
*/
|
||||
typedef StackAllocator<256, int> LeafStackAllocator;
|
||||
typedef StackAllocator<256, float2> LeafTimeStackAllocator;
|
||||
typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
|
||||
|
||||
vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
|
||||
vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
|
||||
vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
|
||||
vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
|
||||
vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
|
||||
|
||||
/* TODO(sergey): In theory we should be able to store references. */
|
||||
@@ -918,6 +934,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
p_type[type_index].push_back(ref.prim_type());
|
||||
p_index[type_index].push_back(ref.prim_index());
|
||||
p_object[type_index].push_back(ref.prim_object());
|
||||
p_time[type_index].push_back(make_float2(ref.time_from(),
|
||||
ref.time_to()));
|
||||
|
||||
bounds[type_index].grow(ref.bounds());
|
||||
visibility[type_index] |= objects[ref.prim_object()]->visibility;
|
||||
@@ -947,9 +965,13 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
vector<int, LeafStackAllocator> local_prim_type,
|
||||
local_prim_index,
|
||||
local_prim_object;
|
||||
vector<float2, LeafTimeStackAllocator> local_prim_time;
|
||||
local_prim_type.resize(num_new_prims);
|
||||
local_prim_index.resize(num_new_prims);
|
||||
local_prim_object.resize(num_new_prims);
|
||||
if(need_prim_time) {
|
||||
local_prim_time.resize(num_new_prims);
|
||||
}
|
||||
for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
|
||||
int num = (int)p_type[i].size();
|
||||
if(num != 0) {
|
||||
@@ -962,6 +984,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
local_prim_type[index] = p_type[i][j];
|
||||
local_prim_index[index] = p_index[i][j];
|
||||
local_prim_object[index] = p_object[i][j];
|
||||
if(need_prim_time) {
|
||||
local_prim_time[index] = p_time[i][j];
|
||||
}
|
||||
if(params.use_unaligned_nodes && !alignment_found) {
|
||||
alignment_found =
|
||||
unaligned_heuristic.compute_aligned_space(p_ref[i][j],
|
||||
@@ -979,19 +1004,19 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
time_from = min(time_from, ref.time_from());
|
||||
time_to = max(time_to, ref.time_to());
|
||||
}
|
||||
leaf_node->m_time_from = time_from;
|
||||
leaf_node->m_time_to = time_to;
|
||||
leaf_node->time_from = time_from;
|
||||
leaf_node->time_to = time_to;
|
||||
}
|
||||
if(alignment_found) {
|
||||
/* Need to recalculate leaf bounds with new alignment. */
|
||||
leaf_node->m_bounds = BoundBox::empty;
|
||||
leaf_node->bounds = BoundBox::empty;
|
||||
for(int j = 0; j < num; ++j) {
|
||||
const BVHReference &ref = p_ref[i][j];
|
||||
BoundBox ref_bounds =
|
||||
unaligned_heuristic.compute_aligned_prim_boundbox(
|
||||
ref,
|
||||
aligned_space);
|
||||
leaf_node->m_bounds.grow(ref_bounds);
|
||||
leaf_node->bounds.grow(ref_bounds);
|
||||
}
|
||||
/* Set alignment space. */
|
||||
leaf_node->set_aligned_space(aligned_space);
|
||||
@@ -1028,11 +1053,17 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
prim_type.reserve(reserve);
|
||||
prim_index.reserve(reserve);
|
||||
prim_object.reserve(reserve);
|
||||
if(need_prim_time) {
|
||||
prim_time.reserve(reserve);
|
||||
}
|
||||
}
|
||||
|
||||
prim_type.resize(range_end);
|
||||
prim_index.resize(range_end);
|
||||
prim_object.resize(range_end);
|
||||
if(need_prim_time) {
|
||||
prim_time.resize(range_end);
|
||||
}
|
||||
}
|
||||
spatial_spin_lock.unlock();
|
||||
|
||||
@@ -1041,6 +1072,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
|
||||
memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
|
||||
memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
|
||||
if(need_prim_time) {
|
||||
memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
@@ -1053,6 +1087,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
|
||||
memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
|
||||
memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
|
||||
if(need_prim_time) {
|
||||
memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1062,8 +1099,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
*/
|
||||
for(int i = 0; i < num_leaves; ++i) {
|
||||
LeafNode *leaf = (LeafNode *)leaves[i];
|
||||
leaf->m_lo += start_index;
|
||||
leaf->m_hi += start_index;
|
||||
leaf->lo += start_index;
|
||||
leaf->hi += start_index;
|
||||
}
|
||||
|
||||
/* Create leaf node for object. */
|
||||
@@ -1092,17 +1129,17 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
return new InnerNode(range.bounds(), leaves[0], leaves[1]);
|
||||
}
|
||||
else if(num_leaves == 3) {
|
||||
BoundBox inner_bounds = merge(leaves[1]->m_bounds, leaves[2]->m_bounds);
|
||||
BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
|
||||
BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
|
||||
return new InnerNode(range.bounds(), leaves[0], inner);
|
||||
} else {
|
||||
/* Should be doing more branches if more primitive types added. */
|
||||
assert(num_leaves <= 5);
|
||||
BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
|
||||
BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
|
||||
BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
|
||||
BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
|
||||
BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
|
||||
BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
|
||||
BoundBox inner_bounds_c = merge(inner_a->m_bounds, inner_b->m_bounds);
|
||||
BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
|
||||
BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
|
||||
if(num_leaves == 5) {
|
||||
return new InnerNode(range.bounds(), inner_c, leaves[4]);
|
||||
@@ -1137,8 +1174,8 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
|
||||
rotate(parent->children[c], max_depth-1);
|
||||
|
||||
/* compute current area of all children */
|
||||
BoundBox bounds0 = parent->children[0]->m_bounds;
|
||||
BoundBox bounds1 = parent->children[1]->m_bounds;
|
||||
BoundBox bounds0 = parent->children[0]->bounds;
|
||||
BoundBox bounds1 = parent->children[1]->bounds;
|
||||
|
||||
float area0 = bounds0.half_area();
|
||||
float area1 = bounds1.half_area();
|
||||
@@ -1158,8 +1195,8 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
|
||||
BoundBox& other = (c == 0)? bounds1: bounds0;
|
||||
|
||||
/* transpose child bounds */
|
||||
BoundBox target0 = child->children[0]->m_bounds;
|
||||
BoundBox target1 = child->children[1]->m_bounds;
|
||||
BoundBox target0 = child->children[0]->bounds;
|
||||
BoundBox target1 = child->children[1]->bounds;
|
||||
|
||||
/* compute cost for both possible swaps */
|
||||
float cost0 = merge(other, target1).half_area() - child_area[c];
|
||||
@@ -1191,7 +1228,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
|
||||
InnerNode *child = (InnerNode*)parent->children[best_child];
|
||||
|
||||
swap(parent->children[best_other], child->children[best_target]);
|
||||
child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
|
||||
child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -20,13 +20,13 @@
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_binning.h"
|
||||
#include "bvh_unaligned.h"
|
||||
#include "bvh/bvh.h"
|
||||
#include "bvh/bvh_binning.h"
|
||||
#include "bvh/bvh_unaligned.h"
|
||||
|
||||
#include "util_boundbox.h"
|
||||
#include "util_task.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_boundbox.h"
|
||||
#include "util/util_task.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -48,6 +48,7 @@ public:
|
||||
array<int>& prim_type,
|
||||
array<int>& prim_index,
|
||||
array<int>& prim_object,
|
||||
array<float2>& prim_time,
|
||||
const BVHParams& params,
|
||||
Progress& progress);
|
||||
~BVHBuild();
|
||||
@@ -112,6 +113,9 @@ protected:
|
||||
array<int>& prim_type;
|
||||
array<int>& prim_index;
|
||||
array<int>& prim_object;
|
||||
array<float2>& prim_time;
|
||||
|
||||
bool need_prim_time;
|
||||
|
||||
/* Build parameters. */
|
||||
BVHParams params;
|
||||
|
@@ -15,12 +15,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_build.h"
|
||||
#include "bvh_node.h"
|
||||
#include "bvh/bvh.h"
|
||||
#include "bvh/bvh_build.h"
|
||||
#include "bvh/bvh_node.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -62,12 +62,12 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
|
||||
}
|
||||
return cnt;
|
||||
case BVH_STAT_ALIGNED_COUNT:
|
||||
if(!is_unaligned()) {
|
||||
if(!is_unaligned) {
|
||||
cnt = 1;
|
||||
}
|
||||
break;
|
||||
case BVH_STAT_UNALIGNED_COUNT:
|
||||
if(is_unaligned()) {
|
||||
if(is_unaligned) {
|
||||
cnt = 1;
|
||||
}
|
||||
break;
|
||||
@@ -75,7 +75,7 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
|
||||
if(!is_leaf()) {
|
||||
bool has_unaligned = false;
|
||||
for(int j = 0; j < num_children(); j++) {
|
||||
has_unaligned |= get_child(j)->is_unaligned();
|
||||
has_unaligned |= get_child(j)->is_unaligned;
|
||||
}
|
||||
cnt += has_unaligned? 0: 1;
|
||||
}
|
||||
@@ -84,7 +84,7 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
|
||||
if(!is_leaf()) {
|
||||
bool has_unaligned = false;
|
||||
for(int j = 0; j < num_children(); j++) {
|
||||
has_unaligned |= get_child(j)->is_unaligned();
|
||||
has_unaligned |= get_child(j)->is_unaligned;
|
||||
}
|
||||
cnt += has_unaligned? 1: 0;
|
||||
}
|
||||
@@ -95,12 +95,12 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
|
||||
for(int i = 0; i < num_children(); i++) {
|
||||
BVHNode *node = get_child(i);
|
||||
if(node->is_leaf()) {
|
||||
has_unaligned |= node->is_unaligned();
|
||||
has_unaligned |= node->is_unaligned;
|
||||
}
|
||||
else {
|
||||
for(int j = 0; j < node->num_children(); j++) {
|
||||
cnt += node->get_child(j)->getSubtreeSize(stat);
|
||||
has_unaligned |= node->get_child(j)->is_unaligned();
|
||||
has_unaligned |= node->get_child(j)->is_unaligned;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -113,12 +113,12 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
|
||||
for(int i = 0; i < num_children(); i++) {
|
||||
BVHNode *node = get_child(i);
|
||||
if(node->is_leaf()) {
|
||||
has_unaligned |= node->is_unaligned();
|
||||
has_unaligned |= node->is_unaligned;
|
||||
}
|
||||
else {
|
||||
for(int j = 0; j < node->num_children(); j++) {
|
||||
cnt += node->get_child(j)->getSubtreeSize(stat);
|
||||
has_unaligned |= node->get_child(j)->is_unaligned();
|
||||
has_unaligned |= node->get_child(j)->is_unaligned;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -126,10 +126,10 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
|
||||
}
|
||||
return cnt;
|
||||
case BVH_STAT_ALIGNED_LEAF_COUNT:
|
||||
cnt = (is_leaf() && !is_unaligned()) ? 1 : 0;
|
||||
cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
|
||||
break;
|
||||
case BVH_STAT_UNALIGNED_LEAF_COUNT:
|
||||
cnt = (is_leaf() && is_unaligned()) ? 1 : 0;
|
||||
cnt = (is_leaf() && is_unaligned) ? 1 : 0;
|
||||
break;
|
||||
default:
|
||||
assert(0); /* unknown mode */
|
||||
@@ -157,7 +157,7 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons
|
||||
|
||||
for(int i = 0; i < num_children(); i++) {
|
||||
BVHNode *child = get_child(i);
|
||||
SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.safe_area()/m_bounds.safe_area());
|
||||
SAH += child->computeSubtreeSAHCost(p, probability * child->bounds.safe_area()/bounds.safe_area());
|
||||
}
|
||||
|
||||
return SAH;
|
||||
@@ -165,15 +165,15 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons
|
||||
|
||||
uint BVHNode::update_visibility()
|
||||
{
|
||||
if(!is_leaf() && m_visibility == 0) {
|
||||
if(!is_leaf() && visibility == 0) {
|
||||
InnerNode *inner = (InnerNode*)this;
|
||||
BVHNode *child0 = inner->children[0];
|
||||
BVHNode *child1 = inner->children[1];
|
||||
|
||||
m_visibility = child0->update_visibility()|child1->update_visibility();
|
||||
visibility = child0->update_visibility()|child1->update_visibility();
|
||||
}
|
||||
|
||||
return m_visibility;
|
||||
return visibility;
|
||||
}
|
||||
|
||||
void BVHNode::update_time()
|
||||
@@ -184,8 +184,8 @@ void BVHNode::update_time()
|
||||
BVHNode *child1 = inner->children[1];
|
||||
child0->update_time();
|
||||
child1->update_time();
|
||||
m_time_from = min(child0->m_time_from, child1->m_time_from);
|
||||
m_time_to = max(child0->m_time_to, child1->m_time_to);
|
||||
time_from = min(child0->time_from, child1->time_from);
|
||||
time_to = max(child0->time_to, child1->time_to);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,7 +209,7 @@ void LeafNode::print(int depth) const
|
||||
for(int i = 0; i < depth; i++)
|
||||
printf(" ");
|
||||
|
||||
printf("leaf node %d to %d\n", m_lo, m_hi);
|
||||
printf("leaf node %d to %d\n", lo, hi);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -18,9 +18,9 @@
|
||||
#ifndef __BVH_NODE_H__
|
||||
#define __BVH_NODE_H__
|
||||
|
||||
#include "util_boundbox.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_types.h"
|
||||
#include "util/util_boundbox.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -46,16 +46,16 @@ class BVHParams;
|
||||
class BVHNode
|
||||
{
|
||||
public:
|
||||
BVHNode() : m_is_unaligned(false),
|
||||
m_aligned_space(NULL),
|
||||
m_time_from(0.0f),
|
||||
m_time_to(1.0f)
|
||||
BVHNode() : is_unaligned(false),
|
||||
aligned_space(NULL),
|
||||
time_from(0.0f),
|
||||
time_to(1.0f)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~BVHNode()
|
||||
{
|
||||
delete m_aligned_space;
|
||||
delete aligned_space;
|
||||
}
|
||||
|
||||
virtual bool is_leaf() const = 0;
|
||||
@@ -63,30 +63,26 @@ public:
|
||||
virtual BVHNode *get_child(int i) const = 0;
|
||||
virtual int num_triangles() const { return 0; }
|
||||
virtual void print(int depth = 0) const = 0;
|
||||
bool is_unaligned() const { return m_is_unaligned; }
|
||||
|
||||
inline void set_aligned_space(const Transform& aligned_space)
|
||||
{
|
||||
m_is_unaligned = true;
|
||||
if(m_aligned_space == NULL) {
|
||||
m_aligned_space = new Transform(aligned_space);
|
||||
is_unaligned = true;
|
||||
if(this->aligned_space == NULL) {
|
||||
this->aligned_space = new Transform(aligned_space);
|
||||
}
|
||||
else {
|
||||
*m_aligned_space = aligned_space;
|
||||
*this->aligned_space = aligned_space;
|
||||
}
|
||||
}
|
||||
|
||||
inline Transform get_aligned_space() const
|
||||
{
|
||||
if(m_aligned_space == NULL) {
|
||||
if(aligned_space == NULL) {
|
||||
return transform_identity();
|
||||
}
|
||||
return *m_aligned_space;
|
||||
return *aligned_space;
|
||||
}
|
||||
|
||||
BoundBox m_bounds;
|
||||
uint m_visibility;
|
||||
|
||||
// Subtree functions
|
||||
int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const;
|
||||
float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const;
|
||||
@@ -95,13 +91,18 @@ public:
|
||||
uint update_visibility();
|
||||
void update_time();
|
||||
|
||||
bool m_is_unaligned;
|
||||
// Properties.
|
||||
BoundBox bounds;
|
||||
uint visibility;
|
||||
|
||||
// TODO(sergey): Can be stored as 3x3 matrix, but better to have some
|
||||
// utilities and type defines in util_transform first.
|
||||
Transform *m_aligned_space;
|
||||
bool is_unaligned;
|
||||
|
||||
float m_time_from, m_time_to;
|
||||
/* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
|
||||
* utilities and type defines in util_transform first.
|
||||
*/
|
||||
Transform *aligned_space;
|
||||
|
||||
float time_from, time_to;
|
||||
};
|
||||
|
||||
class InnerNode : public BVHNode
|
||||
@@ -111,20 +112,20 @@ public:
|
||||
BVHNode* child0,
|
||||
BVHNode* child1)
|
||||
{
|
||||
m_bounds = bounds;
|
||||
this->bounds = bounds;
|
||||
children[0] = child0;
|
||||
children[1] = child1;
|
||||
|
||||
if(child0 && child1)
|
||||
m_visibility = child0->m_visibility|child1->m_visibility;
|
||||
visibility = child0->visibility|child1->visibility;
|
||||
else
|
||||
m_visibility = 0; /* happens on build cancel */
|
||||
visibility = 0; /* happens on build cancel */
|
||||
}
|
||||
|
||||
explicit InnerNode(const BoundBox& bounds)
|
||||
{
|
||||
m_bounds = bounds;
|
||||
m_visibility = 0;
|
||||
this->bounds = bounds;
|
||||
visibility = 0;
|
||||
children[0] = NULL;
|
||||
children[1] = NULL;
|
||||
}
|
||||
@@ -140,12 +141,12 @@ public:
|
||||
class LeafNode : public BVHNode
|
||||
{
|
||||
public:
|
||||
LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi)
|
||||
LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi)
|
||||
: lo(lo),
|
||||
hi(hi)
|
||||
{
|
||||
m_bounds = bounds;
|
||||
m_visibility = visibility;
|
||||
m_lo = lo;
|
||||
m_hi = hi;
|
||||
this->bounds = bounds;
|
||||
this->visibility = visibility;
|
||||
}
|
||||
|
||||
LeafNode(const LeafNode& s)
|
||||
@@ -157,14 +158,13 @@ public:
|
||||
bool is_leaf() const { return true; }
|
||||
int num_children() const { return 0; }
|
||||
BVHNode *get_child(int) const { return NULL; }
|
||||
int num_triangles() const { return m_hi - m_lo; }
|
||||
int num_triangles() const { return hi - lo; }
|
||||
void print(int depth) const;
|
||||
|
||||
int m_lo;
|
||||
int m_hi;
|
||||
int lo;
|
||||
int hi;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __BVH_NODE_H__ */
|
||||
|
||||
|
@@ -18,9 +18,9 @@
|
||||
#ifndef __BVH_PARAMS_H__
|
||||
#define __BVH_PARAMS_H__
|
||||
|
||||
#include "util_boundbox.h"
|
||||
#include "util/util_boundbox.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -104,6 +104,7 @@ public:
|
||||
primitive_mask = PRIMITIVE_ALL;
|
||||
|
||||
num_motion_curve_steps = 0;
|
||||
num_motion_triangle_steps = 0;
|
||||
}
|
||||
|
||||
/* SAH costs */
|
||||
|
@@ -15,12 +15,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "bvh_build.h"
|
||||
#include "bvh_sort.h"
|
||||
#include "bvh/bvh_build.h"
|
||||
#include "bvh/bvh_sort.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_task.h"
|
||||
#include "util/util_algorithm.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -15,14 +15,14 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "bvh_build.h"
|
||||
#include "bvh_split.h"
|
||||
#include "bvh_sort.h"
|
||||
#include "bvh/bvh_build.h"
|
||||
#include "bvh/bvh_split.h"
|
||||
#include "bvh/bvh_sort.h"
|
||||
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util/util_algorithm.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -18,8 +18,8 @@
|
||||
#ifndef __BVH_SPLIT_H__
|
||||
#define __BVH_SPLIT_H__
|
||||
|
||||
#include "bvh_build.h"
|
||||
#include "bvh_params.h"
|
||||
#include "bvh/bvh_build.h"
|
||||
#include "bvh/bvh_params.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -15,17 +15,17 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "bvh_unaligned.h"
|
||||
#include "bvh/bvh_unaligned.h"
|
||||
|
||||
#include "mesh.h"
|
||||
#include "object.h"
|
||||
#include "render/mesh.h"
|
||||
#include "render/object.h"
|
||||
|
||||
#include "bvh_binning.h"
|
||||
#include "bvh/bvh_binning.h"
|
||||
#include "bvh_params.h"
|
||||
|
||||
#include "util_boundbox.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_transform.h"
|
||||
#include "util/util_boundbox.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_transform.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#ifndef __BVH_UNALIGNED_H__
|
||||
#define __BVH_UNALIGNED_H__
|
||||
|
||||
#include "util_vector.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -1,12 +1,6 @@
|
||||
|
||||
set(INC
|
||||
.
|
||||
../graph
|
||||
../kernel
|
||||
../kernel/svm
|
||||
../kernel/osl
|
||||
../util
|
||||
../render
|
||||
..
|
||||
../../glew-mx
|
||||
)
|
||||
|
||||
@@ -33,6 +27,7 @@ set(SRC
|
||||
device_cuda.cpp
|
||||
device_multi.cpp
|
||||
device_opencl.cpp
|
||||
device_split_kernel.cpp
|
||||
device_task.cpp
|
||||
)
|
||||
|
||||
@@ -56,6 +51,7 @@ set(SRC_HEADERS
|
||||
device_memory.h
|
||||
device_intern.h
|
||||
device_network.h
|
||||
device_split_kernel.h
|
||||
device_task.h
|
||||
)
|
||||
|
||||
|
@@ -17,18 +17,18 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device/device.h"
|
||||
#include "device/device_intern.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_half.h"
|
||||
#include "util_math.h"
|
||||
#include "util_opengl.h"
|
||||
#include "util_time.h"
|
||||
#include "util_types.h"
|
||||
#include "util_vector.h"
|
||||
#include "util_string.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_math.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_time.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_vector.h"
|
||||
#include "util/util_string.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -48,11 +48,11 @@ std::ostream& operator <<(std::ostream &os,
|
||||
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
|
||||
/* TODO(sergey): Decode bitflag into list of names. */
|
||||
os << "Nodes features: " << requested_features.nodes_features << std::endl;
|
||||
os << "Use hair: "
|
||||
os << "Use Hair: "
|
||||
<< string_from_bool(requested_features.use_hair) << std::endl;
|
||||
os << "Use object motion: "
|
||||
os << "Use Object Motion: "
|
||||
<< string_from_bool(requested_features.use_object_motion) << std::endl;
|
||||
os << "Use camera motion: "
|
||||
os << "Use Camera Motion: "
|
||||
<< string_from_bool(requested_features.use_camera_motion) << std::endl;
|
||||
os << "Use Baking: "
|
||||
<< string_from_bool(requested_features.use_baking) << std::endl;
|
||||
@@ -80,7 +80,7 @@ Device::~Device()
|
||||
|
||||
void Device::pixels_alloc(device_memory& mem)
|
||||
{
|
||||
mem_alloc(mem, MEM_READ_WRITE);
|
||||
mem_alloc("pixels", mem, MEM_READ_WRITE);
|
||||
}
|
||||
|
||||
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
|
||||
|
@@ -19,15 +19,15 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "device_memory.h"
|
||||
#include "device_task.h"
|
||||
#include "device/device_memory.h"
|
||||
#include "device/device_task.h"
|
||||
|
||||
#include "util_list.h"
|
||||
#include "util_stats.h"
|
||||
#include "util_string.h"
|
||||
#include "util_thread.h"
|
||||
#include "util_types.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_list.h"
|
||||
#include "util/util_stats.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_thread.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -121,6 +121,9 @@ public:
|
||||
/* Use Transparent shadows */
|
||||
bool use_transparent;
|
||||
|
||||
/* Use various shadow tricks, such as shadow catcher. */
|
||||
bool use_shadow_tricks;
|
||||
|
||||
DeviceRequestedFeatures()
|
||||
{
|
||||
/* TODO(sergey): Find more meaningful defaults. */
|
||||
@@ -137,6 +140,7 @@ public:
|
||||
use_integrator_branched = false;
|
||||
use_patch_evaluation = false;
|
||||
use_transparent = false;
|
||||
use_shadow_tricks = false;
|
||||
}
|
||||
|
||||
bool modified(const DeviceRequestedFeatures& requested_features)
|
||||
@@ -153,7 +157,8 @@ public:
|
||||
use_volume == requested_features.use_volume &&
|
||||
use_integrator_branched == requested_features.use_integrator_branched &&
|
||||
use_patch_evaluation == requested_features.use_patch_evaluation &&
|
||||
use_transparent == requested_features.use_transparent);
|
||||
use_transparent == requested_features.use_transparent &&
|
||||
use_shadow_tricks == requested_features.use_shadow_tricks);
|
||||
}
|
||||
|
||||
/* Convert the requested features structure to a build options,
|
||||
@@ -194,9 +199,12 @@ public:
|
||||
if(!use_patch_evaluation) {
|
||||
build_options += " -D__NO_PATCH_EVAL__";
|
||||
}
|
||||
if(!use_transparent) {
|
||||
if(!use_transparent && !use_volume) {
|
||||
build_options += " -D__NO_TRANSPARENT__";
|
||||
}
|
||||
if(!use_shadow_tricks) {
|
||||
build_options += " -D__NO_SHADOW_TRICKS__";
|
||||
}
|
||||
return build_options;
|
||||
}
|
||||
};
|
||||
@@ -228,13 +236,21 @@ public:
|
||||
DeviceInfo info;
|
||||
virtual const string& error_message() { return error_msg; }
|
||||
bool have_error() { return !error_message().empty(); }
|
||||
virtual void set_error(const string& error)
|
||||
{
|
||||
if(!have_error()) {
|
||||
error_msg = error;
|
||||
}
|
||||
fprintf(stderr, "%s\n", error.c_str());
|
||||
fflush(stderr);
|
||||
}
|
||||
virtual bool show_samples() const { return false; }
|
||||
|
||||
/* statistics */
|
||||
Stats &stats;
|
||||
|
||||
/* regular memory */
|
||||
virtual void mem_alloc(device_memory& mem, MemoryType type) = 0;
|
||||
virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0;
|
||||
virtual void mem_copy_to(device_memory& mem) = 0;
|
||||
virtual void mem_copy_from(device_memory& mem,
|
||||
int y, int w, int h, int elem) = 0;
|
||||
|
@@ -20,36 +20,124 @@
|
||||
/* So ImathMath is included before our kernel_cpu_compat. */
|
||||
#ifdef WITH_OSL
|
||||
/* So no context pollution happens from indirectly included windows.h */
|
||||
# include "util_windows.h"
|
||||
# include "util/util_windows.h"
|
||||
# include <OSL/oslexec.h>
|
||||
#endif
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device/device.h"
|
||||
#include "device/device_intern.h"
|
||||
#include "device/device_split_kernel.h"
|
||||
|
||||
#include "kernel.h"
|
||||
#include "kernel_compat_cpu.h"
|
||||
#include "kernel_types.h"
|
||||
#include "kernel_globals.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/kernel_compat_cpu.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
#include "kernel/split/kernel_split_data.h"
|
||||
#include "kernel/kernel_globals.h"
|
||||
|
||||
#include "osl_shader.h"
|
||||
#include "osl_globals.h"
|
||||
#include "kernel/osl/osl_shader.h"
|
||||
#include "kernel/osl/osl_globals.h"
|
||||
|
||||
#include "buffers.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_function.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_opengl.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_system.h"
|
||||
#include "util_thread.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_system.h"
|
||||
#include "util/util_thread.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class CPUDevice;
|
||||
|
||||
class CPUSplitKernel : public DeviceSplitKernel {
|
||||
CPUDevice *device;
|
||||
public:
|
||||
explicit CPUSplitKernel(CPUDevice *device);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& kernel_data_,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs);
|
||||
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
|
||||
virtual int2 split_kernel_local_size();
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
};
|
||||
|
||||
class CPUDevice : public Device
|
||||
{
|
||||
static unordered_map<string, void*> kernel_functions;
|
||||
|
||||
static void register_kernel_function(const char* name, void* func)
|
||||
{
|
||||
kernel_functions[name] = func;
|
||||
}
|
||||
|
||||
static const char* get_arch_name()
|
||||
{
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
if(system_cpu_support_avx2()) {
|
||||
return "cpu_avx2";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||||
if(system_cpu_support_avx()) {
|
||||
return "cpu_avx";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||
if(system_cpu_support_sse41()) {
|
||||
return "cpu_sse41";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||||
if(system_cpu_support_sse3()) {
|
||||
return "cpu_sse3";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||
if(system_cpu_support_sse2()) {
|
||||
return "cpu_sse2";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
return "cpu";
|
||||
}
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
static F get_kernel_function(string name)
|
||||
{
|
||||
name = string("kernel_") + get_arch_name() + "_" + name;
|
||||
|
||||
unordered_map<string, void*>::iterator it = kernel_functions.find(name);
|
||||
|
||||
if(it == kernel_functions.end()) {
|
||||
assert(!"kernel function not found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (F)it->second;
|
||||
}
|
||||
|
||||
friend class CPUSplitKernel;
|
||||
|
||||
public:
|
||||
TaskPool task_pool;
|
||||
KernelGlobals kernel_globals;
|
||||
@@ -57,10 +145,15 @@ public:
|
||||
#ifdef WITH_OSL
|
||||
OSLGlobals osl_globals;
|
||||
#endif
|
||||
|
||||
bool use_split_kernel;
|
||||
|
||||
DeviceRequestedFeatures requested_features;
|
||||
|
||||
CPUDevice(DeviceInfo& info, Stats &stats, bool background)
|
||||
: Device(info, stats, background)
|
||||
{
|
||||
|
||||
#ifdef WITH_OSL
|
||||
kernel_globals.osl = &osl_globals;
|
||||
#endif
|
||||
@@ -105,6 +198,28 @@ public:
|
||||
{
|
||||
VLOG(1) << "Will be using regular kernels.";
|
||||
}
|
||||
|
||||
use_split_kernel = DebugFlags().cpu.split_kernel;
|
||||
if(use_split_kernel) {
|
||||
VLOG(1) << "Will be using split kernel.";
|
||||
}
|
||||
|
||||
kernel_cpu_register_functions(register_kernel_function);
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||
kernel_cpu_sse2_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||||
kernel_cpu_sse3_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||
kernel_cpu_sse41_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||||
kernel_cpu_avx_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
kernel_cpu_avx2_register_functions(register_kernel_function);
|
||||
#endif
|
||||
}
|
||||
|
||||
~CPUDevice()
|
||||
@@ -117,9 +232,20 @@ public:
|
||||
return (TaskScheduler::num_threads() == 1);
|
||||
}
|
||||
|
||||
void mem_alloc(device_memory& mem, MemoryType /*type*/)
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
mem.device_pointer = mem.data_pointer;
|
||||
|
||||
if(!mem.device_pointer) {
|
||||
mem.device_pointer = (device_ptr)malloc(mem.memory_size());
|
||||
}
|
||||
|
||||
mem.device_size = mem.memory_size();
|
||||
stats.mem_alloc(mem.device_size);
|
||||
}
|
||||
@@ -144,6 +270,10 @@ public:
|
||||
void mem_free(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
if(!mem.data_pointer) {
|
||||
free((void*)mem.device_pointer);
|
||||
}
|
||||
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_size = 0;
|
||||
@@ -196,8 +326,14 @@ public:
|
||||
|
||||
void thread_run(DeviceTask *task)
|
||||
{
|
||||
if(task->type == DeviceTask::PATH_TRACE)
|
||||
thread_path_trace(*task);
|
||||
if(task->type == DeviceTask::PATH_TRACE) {
|
||||
if(!use_split_kernel) {
|
||||
thread_path_trace(*task);
|
||||
}
|
||||
else {
|
||||
thread_path_trace_split(*task);
|
||||
}
|
||||
}
|
||||
else if(task->type == DeviceTask::FILM_CONVERT)
|
||||
thread_film_convert(*task);
|
||||
else if(task->type == DeviceTask::SHADER)
|
||||
@@ -258,7 +394,7 @@ public:
|
||||
{
|
||||
path_trace_kernel = kernel_cpu_path_trace;
|
||||
}
|
||||
|
||||
|
||||
while(task.acquire_tile(this, tile)) {
|
||||
float *render_buffer = (float*)tile.buffer;
|
||||
uint *rng_state = (uint*)tile.rng_state;
|
||||
@@ -294,6 +430,49 @@ public:
|
||||
thread_kernel_globals_free(&kg);
|
||||
}
|
||||
|
||||
void thread_path_trace_split(DeviceTask& task)
|
||||
{
|
||||
if(task_pool.canceled()) {
|
||||
if(task.need_finish_queue == false)
|
||||
return;
|
||||
}
|
||||
|
||||
RenderTile tile;
|
||||
|
||||
CPUSplitKernel split_kernel(this);
|
||||
|
||||
/* allocate buffer for kernel globals */
|
||||
device_memory kgbuffer;
|
||||
kgbuffer.resize(sizeof(KernelGlobals));
|
||||
mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
|
||||
|
||||
KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
|
||||
*kg = thread_kernel_globals_init();
|
||||
|
||||
requested_features.max_closure = MAX_CLOSURE;
|
||||
if(!split_kernel.load_kernels(requested_features)) {
|
||||
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
|
||||
mem_free(kgbuffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
while(task.acquire_tile(this, tile)) {
|
||||
device_memory data;
|
||||
split_kernel.path_trace(&task, tile, kgbuffer, data);
|
||||
|
||||
task.release_tile(tile);
|
||||
|
||||
if(task_pool.canceled()) {
|
||||
if(task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
|
||||
mem_free(kgbuffer);
|
||||
}
|
||||
|
||||
void thread_film_convert(DeviceTask& task)
|
||||
{
|
||||
float sample_scale = 1.0f/(task.sample + 1);
|
||||
@@ -501,6 +680,10 @@ protected:
|
||||
|
||||
inline void thread_kernel_globals_free(KernelGlobals *kg)
|
||||
{
|
||||
if(kg == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(kg->transparent_shadow_intersections != NULL) {
|
||||
free(kg->transparent_shadow_intersections);
|
||||
}
|
||||
@@ -515,8 +698,175 @@ protected:
|
||||
OSLShader::thread_free(kg);
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual bool load_kernels(DeviceRequestedFeatures& requested_features_) {
|
||||
requested_features = requested_features_;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/* split kernel */
|
||||
|
||||
class CPUSplitKernelFunction : public SplitKernelFunction {
|
||||
public:
|
||||
CPUDevice* device;
|
||||
void (*func)(KernelGlobals *kg, KernelData *data);
|
||||
|
||||
CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {}
|
||||
~CPUSplitKernelFunction() {}
|
||||
|
||||
virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data)
|
||||
{
|
||||
if(!func) {
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
|
||||
|
||||
for(int y = 0; y < dim.global_size[1]; y++) {
|
||||
for(int x = 0; x < dim.global_size[0]; x++) {
|
||||
kg->global_id = make_int2(x, y);
|
||||
|
||||
func(kg, (KernelData*)data.device_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device)
|
||||
{
|
||||
}
|
||||
|
||||
bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& data,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flags,
|
||||
device_memory& work_pool_wgs)
|
||||
{
|
||||
typedef void(*data_init_t)(KernelGlobals *kg,
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global void *split_data_buffer,
|
||||
int num_elements,
|
||||
ccl_global char *ray_state,
|
||||
ccl_global uint *rng_state,
|
||||
int start_sample,
|
||||
int end_sample,
|
||||
int sx, int sy, int sw, int sh, int offset, int stride,
|
||||
ccl_global int *Queue_index,
|
||||
int queuesize,
|
||||
ccl_global char *use_queues_flag,
|
||||
ccl_global unsigned int *work_pool_wgs,
|
||||
unsigned int num_samples,
|
||||
ccl_global float *buffer);
|
||||
|
||||
data_init_t data_init;
|
||||
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
if(system_cpu_support_avx2()) {
|
||||
data_init = kernel_cpu_avx2_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||||
if(system_cpu_support_avx()) {
|
||||
data_init = kernel_cpu_avx_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||
if(system_cpu_support_sse41()) {
|
||||
data_init = kernel_cpu_sse41_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||||
if(system_cpu_support_sse3()) {
|
||||
data_init = kernel_cpu_sse3_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||
if(system_cpu_support_sse2()) {
|
||||
data_init = kernel_cpu_sse2_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
data_init = kernel_cpu_data_init;
|
||||
}
|
||||
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
|
||||
|
||||
for(int y = 0; y < dim.global_size[1]; y++) {
|
||||
for(int x = 0; x < dim.global_size[0]; x++) {
|
||||
kg->global_id = make_int2(x, y);
|
||||
|
||||
data_init((KernelGlobals*)kernel_globals.device_pointer,
|
||||
(KernelData*)data.device_pointer,
|
||||
(void*)split_data.device_pointer,
|
||||
num_global_elements,
|
||||
(char*)ray_state.device_pointer,
|
||||
(uint*)rtile.rng_state,
|
||||
rtile.start_sample,
|
||||
rtile.start_sample + rtile.num_samples,
|
||||
rtile.x,
|
||||
rtile.y,
|
||||
rtile.w,
|
||||
rtile.h,
|
||||
rtile.offset,
|
||||
rtile.stride,
|
||||
(int*)queue_index.device_pointer,
|
||||
dim.global_size[0] * dim.global_size[1],
|
||||
(char*)use_queues_flags.device_pointer,
|
||||
(uint*)work_pool_wgs.device_pointer,
|
||||
rtile.num_samples,
|
||||
(float*)rtile.buffer);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
|
||||
{
|
||||
CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
|
||||
|
||||
kernel->func = device->get_kernel_function<void(*)(KernelGlobals*, KernelData*)>(kernel_name);
|
||||
if(!kernel->func) {
|
||||
delete kernel;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return kernel;
|
||||
}
|
||||
|
||||
int2 CPUSplitKernel::split_kernel_local_size()
|
||||
{
|
||||
return make_int2(1, 1);
|
||||
}
|
||||
|
||||
int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) {
|
||||
return make_int2(64, 1);
|
||||
}
|
||||
|
||||
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
|
||||
return split_data_buffer_size(kg, num_threads);
|
||||
}
|
||||
|
||||
unordered_map<string, void*> CPUDevice::kernel_functions;
|
||||
|
||||
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
|
||||
{
|
||||
return new CPUDevice(info, stats, background);
|
||||
|
@@ -15,32 +15,36 @@
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device/device.h"
|
||||
#include "device/device_intern.h"
|
||||
#include "device/device_split_kernel.h"
|
||||
|
||||
#include "buffers.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
# include "cuew.h"
|
||||
#else
|
||||
# include "util_opengl.h"
|
||||
# include "util/util_opengl.h"
|
||||
# include <cuda.h>
|
||||
# include <cudaGL.h>
|
||||
#endif
|
||||
#include "util_debug.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_map.h"
|
||||
#include "util_md5.h"
|
||||
#include "util_opengl.h"
|
||||
#include "util_path.h"
|
||||
#include "util_string.h"
|
||||
#include "util_system.h"
|
||||
#include "util_types.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_md5.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_system.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
#include "kernel/split/kernel_split_data_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -78,6 +82,31 @@ int cuewCompilerVersion(void)
|
||||
} /* namespace */
|
||||
#endif /* WITH_CUDA_DYNLOAD */
|
||||
|
||||
class CUDADevice;
|
||||
|
||||
class CUDASplitKernel : public DeviceSplitKernel {
|
||||
CUDADevice *device;
|
||||
public:
|
||||
explicit CUDASplitKernel(CUDADevice *device);
|
||||
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& kernel_data_,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs);
|
||||
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
|
||||
virtual int2 split_kernel_local_size();
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
|
||||
};
|
||||
|
||||
class CUDADevice : public Device
|
||||
{
|
||||
public:
|
||||
@@ -258,16 +287,21 @@ public:
|
||||
return DebugFlags().cuda.adaptive_compile;
|
||||
}
|
||||
|
||||
bool use_split_kernel()
|
||||
{
|
||||
return DebugFlags().cuda.split_kernel;
|
||||
}
|
||||
|
||||
/* Common NVCC flags which stays the same regardless of shading model,
|
||||
* kernel sources md5 and only depends on compiler or compilation settings.
|
||||
*/
|
||||
string compile_kernel_get_common_cflags(
|
||||
const DeviceRequestedFeatures& requested_features)
|
||||
const DeviceRequestedFeatures& requested_features, bool split=false)
|
||||
{
|
||||
const int cuda_version = cuewCompilerVersion();
|
||||
const int machine = system_cpu_bits();
|
||||
const string kernel_path = path_get("kernel");
|
||||
const string include = kernel_path;
|
||||
const string source_path = path_get("source");
|
||||
const string include_path = source_path;
|
||||
string cflags = string_printf("-m%d "
|
||||
"--ptxas-options=\"-v\" "
|
||||
"--use_fast_math "
|
||||
@@ -276,7 +310,7 @@ public:
|
||||
"-I\"%s\"",
|
||||
machine,
|
||||
cuda_version,
|
||||
include.c_str());
|
||||
include_path.c_str());
|
||||
if(use_adaptive_compilation()) {
|
||||
cflags += " " + requested_features.get_build_options();
|
||||
}
|
||||
@@ -287,6 +321,11 @@ public:
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
cflags += " -D__KERNEL_DEBUG__";
|
||||
#endif
|
||||
|
||||
if(split) {
|
||||
cflags += " -D__SPLIT__";
|
||||
}
|
||||
|
||||
return cflags;
|
||||
}
|
||||
|
||||
@@ -306,21 +345,21 @@ public:
|
||||
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
|
||||
return false;
|
||||
}
|
||||
if(cuda_version < 75) {
|
||||
if(cuda_version < 80) {
|
||||
printf("Unsupported CUDA version %d.%d detected, "
|
||||
"you need CUDA 7.5 or newer.\n",
|
||||
"you need CUDA 8.0 or newer.\n",
|
||||
major, minor);
|
||||
return false;
|
||||
}
|
||||
else if(cuda_version != 75 && cuda_version != 80) {
|
||||
else if(cuda_version != 80) {
|
||||
printf("CUDA version %d.%d detected, build may succeed but only "
|
||||
"CUDA 7.5 and 8.0 are officially supported.\n",
|
||||
"CUDA 8.0 is officially supported.\n",
|
||||
major, minor);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string compile_kernel(const DeviceRequestedFeatures& requested_features)
|
||||
string compile_kernel(const DeviceRequestedFeatures& requested_features, bool split=false)
|
||||
{
|
||||
/* Compute cubin name. */
|
||||
int major, minor;
|
||||
@@ -329,7 +368,8 @@ public:
|
||||
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if(!use_adaptive_compilation()) {
|
||||
const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin",
|
||||
const string cubin = path_get(string_printf(split ? "lib/kernel_split_sm_%d%d.cubin"
|
||||
: "lib/kernel_sm_%d%d.cubin",
|
||||
major, minor));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
|
||||
if(path_exists(cubin)) {
|
||||
@@ -339,18 +379,19 @@ public:
|
||||
}
|
||||
|
||||
const string common_cflags =
|
||||
compile_kernel_get_common_cflags(requested_features);
|
||||
compile_kernel_get_common_cflags(requested_features, split);
|
||||
|
||||
/* Try to use locally compiled kernel. */
|
||||
const string kernel_path = path_get("kernel");
|
||||
const string kernel_md5 = path_files_md5_hash(kernel_path);
|
||||
const string source_path = path_get("source");
|
||||
const string kernel_md5 = path_files_md5_hash(source_path);
|
||||
|
||||
/* We include cflags into md5 so changing cuda toolkit or changing other
|
||||
* compiler command line arguments makes sure cubin gets re-built.
|
||||
*/
|
||||
const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
|
||||
|
||||
const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin",
|
||||
const string cubin_file = string_printf(split ? "cycles_kernel_split_sm%d%d_%s.cubin"
|
||||
: "cycles_kernel_sm%d%d_%s.cubin",
|
||||
major, minor,
|
||||
cubin_md5.c_str());
|
||||
const string cubin = path_cache_get(path_join("kernels", cubin_file));
|
||||
@@ -383,9 +424,10 @@ public:
|
||||
return "";
|
||||
}
|
||||
const char *nvcc = cuewCompilerPath();
|
||||
const string kernel = path_join(kernel_path,
|
||||
path_join("kernels",
|
||||
path_join("cuda", "kernel.cu")));
|
||||
const string kernel = path_join(
|
||||
path_join(source_path, "kernel"),
|
||||
path_join("kernels",
|
||||
path_join("cuda", split ? "kernel_split.cu" : "kernel.cu")));
|
||||
double starttime = time_dt();
|
||||
printf("Compiling CUDA kernel ...\n");
|
||||
|
||||
@@ -433,7 +475,7 @@ public:
|
||||
return false;
|
||||
|
||||
/* get kernel */
|
||||
string cubin = compile_kernel(requested_features);
|
||||
string cubin = compile_kernel(requested_features, use_split_kernel());
|
||||
|
||||
if(cubin == "")
|
||||
return false;
|
||||
@@ -466,8 +508,14 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void mem_alloc(device_memory& mem, MemoryType /*type*/)
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
cuda_push_context();
|
||||
CUdeviceptr device_pointer;
|
||||
size_t size = mem.memory_size();
|
||||
@@ -504,7 +552,9 @@ public:
|
||||
|
||||
void mem_zero(device_memory& mem)
|
||||
{
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
if(mem.data_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
cuda_push_context();
|
||||
if(mem.device_pointer)
|
||||
@@ -617,7 +667,7 @@ public:
|
||||
/* Data Storage */
|
||||
if(interpolation == INTERPOLATION_NONE) {
|
||||
if(has_bindless_textures) {
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
|
||||
cuda_push_context();
|
||||
@@ -641,7 +691,7 @@ public:
|
||||
cuda_pop_context();
|
||||
}
|
||||
else {
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
|
||||
cuda_push_context();
|
||||
@@ -1258,25 +1308,48 @@ public:
|
||||
/* Upload Bindless Mapping */
|
||||
load_bindless_mapping();
|
||||
|
||||
/* keep rendering tiles until done */
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
if(!use_split_kernel()) {
|
||||
/* keep rendering tiles until done */
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
|
||||
for(int sample = start_sample; sample < end_sample; sample++) {
|
||||
if(task->get_cancel()) {
|
||||
if(task->need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
path_trace(tile, sample, branched);
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task->update_progress(&tile, tile.w*tile.h);
|
||||
}
|
||||
|
||||
task->release_tile(tile);
|
||||
}
|
||||
}
|
||||
else {
|
||||
DeviceRequestedFeatures requested_features;
|
||||
if(!use_adaptive_compilation()) {
|
||||
requested_features.max_closure = 64;
|
||||
}
|
||||
|
||||
CUDASplitKernel split_kernel(this);
|
||||
split_kernel.load_kernels(requested_features);
|
||||
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
device_memory void_buffer;
|
||||
split_kernel.path_trace(task, tile, void_buffer, void_buffer);
|
||||
|
||||
task->release_tile(tile);
|
||||
|
||||
for(int sample = start_sample; sample < end_sample; sample++) {
|
||||
if(task->get_cancel()) {
|
||||
if(task->need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
path_trace(tile, sample, branched);
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task->update_progress(&tile, tile.w*tile.h);
|
||||
}
|
||||
|
||||
task->release_tile(tile);
|
||||
}
|
||||
}
|
||||
else if(task->type == DeviceTask::SHADER) {
|
||||
@@ -1329,8 +1402,223 @@ public:
|
||||
{
|
||||
task_pool.cancel();
|
||||
}
|
||||
|
||||
friend class CUDASplitKernelFunction;
|
||||
friend class CUDASplitKernel;
|
||||
};
|
||||
|
||||
/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
|
||||
* now that the definition of that class is complete
|
||||
*/
|
||||
#undef cuda_assert
|
||||
#define cuda_assert(stmt) \
|
||||
{ \
|
||||
CUresult result = stmt; \
|
||||
\
|
||||
if(result != CUDA_SUCCESS) { \
|
||||
string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
|
||||
if(device->error_msg == "") \
|
||||
device->error_msg = message; \
|
||||
fprintf(stderr, "%s\n", message.c_str()); \
|
||||
/*cuda_abort();*/ \
|
||||
device->cuda_error_documentation(); \
|
||||
} \
|
||||
} (void)0
|
||||
|
||||
/* split kernel */
|
||||
|
||||
class CUDASplitKernelFunction : public SplitKernelFunction{
|
||||
CUDADevice* device;
|
||||
CUfunction func;
|
||||
public:
|
||||
CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) {}
|
||||
|
||||
/* enqueue the kernel, returns false if there is an error */
|
||||
bool enqueue(const KernelDimensions &dim, device_memory &/*kg*/, device_memory &/*data*/)
|
||||
{
|
||||
return enqueue(dim, NULL);
|
||||
}
|
||||
|
||||
/* enqueue the kernel, returns false if there is an error */
|
||||
bool enqueue(const KernelDimensions &dim, void *args[])
|
||||
{
|
||||
device->cuda_push_context();
|
||||
|
||||
if(device->have_error())
|
||||
return false;
|
||||
|
||||
/* we ignore dim.local_size for now, as this is faster */
|
||||
int threads_per_block;
|
||||
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
|
||||
|
||||
int xthreads = (int)sqrt(threads_per_block);
|
||||
int ythreads = (int)sqrt(threads_per_block);
|
||||
|
||||
int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads;
|
||||
int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads;
|
||||
|
||||
cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
|
||||
|
||||
cuda_assert(cuLaunchKernel(func,
|
||||
xblocks , yblocks, 1, /* blocks */
|
||||
xthreads, ythreads, 1, /* threads */
|
||||
0, 0, args, 0));
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
return !device->have_error();
|
||||
}
|
||||
};
|
||||
|
||||
CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
|
||||
{
|
||||
}
|
||||
|
||||
uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
|
||||
{
|
||||
device_vector<uint64_t> size_buffer;
|
||||
size_buffer.resize(1);
|
||||
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
||||
|
||||
device->cuda_push_context();
|
||||
|
||||
uint threads = num_threads;
|
||||
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
|
||||
|
||||
struct args_t {
|
||||
uint* num_threads;
|
||||
CUdeviceptr* size;
|
||||
};
|
||||
|
||||
args_t args = {
|
||||
&threads,
|
||||
&d_size
|
||||
};
|
||||
|
||||
CUfunction state_buffer_size;
|
||||
cuda_assert(cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
|
||||
|
||||
cuda_assert(cuLaunchKernel(state_buffer_size,
|
||||
1, 1, 1,
|
||||
1, 1, 1,
|
||||
0, 0, (void**)&args, 0));
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
|
||||
device->mem_free(size_buffer);
|
||||
|
||||
return *size_buffer.get_data();
|
||||
}
|
||||
|
||||
bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& /*kernel_globals*/,
|
||||
device_memory& /*kernel_data*/,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs)
|
||||
{
|
||||
device->cuda_push_context();
|
||||
|
||||
CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
|
||||
CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
|
||||
CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
|
||||
CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
|
||||
CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
|
||||
|
||||
CUdeviceptr d_rng_state = device->cuda_device_ptr(rtile.rng_state);
|
||||
CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
|
||||
|
||||
int end_sample = rtile.start_sample + rtile.num_samples;
|
||||
int queue_size = dim.global_size[0] * dim.global_size[1];
|
||||
|
||||
struct args_t {
|
||||
CUdeviceptr* split_data_buffer;
|
||||
int* num_elements;
|
||||
CUdeviceptr* ray_state;
|
||||
CUdeviceptr* rng_state;
|
||||
int* start_sample;
|
||||
int* end_sample;
|
||||
int* sx;
|
||||
int* sy;
|
||||
int* sw;
|
||||
int* sh;
|
||||
int* offset;
|
||||
int* stride;
|
||||
CUdeviceptr* queue_index;
|
||||
int* queuesize;
|
||||
CUdeviceptr* use_queues_flag;
|
||||
CUdeviceptr* work_pool_wgs;
|
||||
int* num_samples;
|
||||
CUdeviceptr* buffer;
|
||||
};
|
||||
|
||||
args_t args = {
|
||||
&d_split_data,
|
||||
&num_global_elements,
|
||||
&d_ray_state,
|
||||
&d_rng_state,
|
||||
&rtile.start_sample,
|
||||
&end_sample,
|
||||
&rtile.x,
|
||||
&rtile.y,
|
||||
&rtile.w,
|
||||
&rtile.h,
|
||||
&rtile.offset,
|
||||
&rtile.stride,
|
||||
&d_queue_index,
|
||||
&queue_size,
|
||||
&d_use_queues_flag,
|
||||
&d_work_pool_wgs,
|
||||
&rtile.num_samples,
|
||||
&d_buffer
|
||||
};
|
||||
|
||||
CUfunction data_init;
|
||||
cuda_assert(cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
|
||||
if(device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CUDASplitKernelFunction(device, data_init).enqueue(dim, (void**)&args);
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
return !device->have_error();
|
||||
}
|
||||
|
||||
SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
|
||||
{
|
||||
CUfunction func;
|
||||
|
||||
device->cuda_push_context();
|
||||
|
||||
cuda_assert(cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
|
||||
if(device->have_error()) {
|
||||
device->cuda_error_message(string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
return new CUDASplitKernelFunction(device, func);
|
||||
}
|
||||
|
||||
int2 CUDASplitKernel::split_kernel_local_size()
|
||||
{
|
||||
return make_int2(32, 1);
|
||||
}
|
||||
|
||||
int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask */*task*/)
|
||||
{
|
||||
/* TODO(mai): implement something here to detect ideal work size */
|
||||
return make_int2(256, 256);
|
||||
}
|
||||
|
||||
bool device_cuda_init(void)
|
||||
{
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
|
@@ -28,10 +28,10 @@
|
||||
* other devices this is a pointer to device memory, where we will copy memory
|
||||
* to and from. */
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_half.h"
|
||||
#include "util_types.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -48,7 +48,8 @@ enum DataType {
|
||||
TYPE_UINT,
|
||||
TYPE_INT,
|
||||
TYPE_FLOAT,
|
||||
TYPE_HALF
|
||||
TYPE_HALF,
|
||||
TYPE_UINT64,
|
||||
};
|
||||
|
||||
static inline size_t datatype_size(DataType datatype)
|
||||
@@ -59,6 +60,7 @@ static inline size_t datatype_size(DataType datatype)
|
||||
case TYPE_UINT: return sizeof(uint);
|
||||
case TYPE_INT: return sizeof(int);
|
||||
case TYPE_HALF: return sizeof(half);
|
||||
case TYPE_UINT64: return sizeof(uint64_t);
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
@@ -160,6 +162,11 @@ template<> struct device_type_traits<half4> {
|
||||
static const int num_elements = 4;
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint64_t> {
|
||||
static const DataType data_type = TYPE_UINT64;
|
||||
static const int num_elements = 1;
|
||||
};
|
||||
|
||||
/* Device Memory */
|
||||
|
||||
class device_memory
|
||||
@@ -180,10 +187,27 @@ public:
|
||||
/* device pointer */
|
||||
device_ptr device_pointer;
|
||||
|
||||
protected:
|
||||
device_memory() {}
|
||||
device_memory()
|
||||
{
|
||||
data_type = device_type_traits<uchar>::data_type;
|
||||
data_elements = device_type_traits<uchar>::num_elements;
|
||||
data_pointer = 0;
|
||||
data_size = 0;
|
||||
device_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
device_pointer = 0;
|
||||
}
|
||||
virtual ~device_memory() { assert(!device_pointer); }
|
||||
|
||||
void resize(size_t size)
|
||||
{
|
||||
data_size = size;
|
||||
data_width = size;
|
||||
}
|
||||
|
||||
protected:
|
||||
/* no copying */
|
||||
device_memory(const device_memory&);
|
||||
device_memory& operator = (const device_memory&);
|
||||
@@ -198,16 +222,8 @@ public:
|
||||
{
|
||||
data_type = device_type_traits<T>::data_type;
|
||||
data_elements = device_type_traits<T>::num_elements;
|
||||
data_pointer = 0;
|
||||
data_size = 0;
|
||||
device_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
|
||||
assert(data_elements > 0);
|
||||
|
||||
device_pointer = 0;
|
||||
}
|
||||
|
||||
virtual ~device_vector() {}
|
||||
@@ -266,6 +282,7 @@ public:
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
data_size = 0;
|
||||
device_pointer = 0;
|
||||
}
|
||||
|
||||
size_t size()
|
||||
|
@@ -17,17 +17,17 @@
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device_network.h"
|
||||
#include "device/device.h"
|
||||
#include "device/device_intern.h"
|
||||
#include "device/device_network.h"
|
||||
|
||||
#include "buffers.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_list.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_map.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_list.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -106,11 +106,11 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void mem_alloc(device_memory& mem, MemoryType type)
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
||||
{
|
||||
foreach(SubDevice& sub, devices) {
|
||||
mem.device_pointer = 0;
|
||||
sub.device->mem_alloc(mem, type);
|
||||
sub.device->mem_alloc(name, mem, type);
|
||||
sub.ptr_map[unique_ptr] = mem.device_pointer;
|
||||
}
|
||||
|
||||
@@ -162,6 +162,7 @@ public:
|
||||
void mem_free(device_memory& mem)
|
||||
{
|
||||
device_ptr tmp = mem.device_pointer;
|
||||
stats.mem_free(mem.device_size);
|
||||
|
||||
foreach(SubDevice& sub, devices) {
|
||||
mem.device_pointer = sub.ptr_map[tmp];
|
||||
@@ -170,7 +171,6 @@ public:
|
||||
}
|
||||
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
}
|
||||
|
||||
void const_copy_to(const char *name, void *host, size_t size)
|
||||
@@ -202,6 +202,7 @@ public:
|
||||
void tex_free(device_memory& mem)
|
||||
{
|
||||
device_ptr tmp = mem.device_pointer;
|
||||
stats.mem_free(mem.device_size);
|
||||
|
||||
foreach(SubDevice& sub, devices) {
|
||||
mem.device_pointer = sub.ptr_map[tmp];
|
||||
@@ -210,7 +211,6 @@ public:
|
||||
}
|
||||
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
}
|
||||
|
||||
void pixels_alloc(device_memory& mem)
|
||||
|
@@ -14,12 +14,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device_network.h"
|
||||
#include "device/device.h"
|
||||
#include "device/device_intern.h"
|
||||
#include "device/device_network.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
#if defined(WITH_NETWORK)
|
||||
|
||||
@@ -87,8 +87,14 @@ public:
|
||||
snd.write();
|
||||
}
|
||||
|
||||
void mem_alloc(device_memory& mem, MemoryType type)
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
thread_scoped_lock lock(rpc_lock);
|
||||
|
||||
mem.device_pointer = ++mem_counter;
|
||||
@@ -481,7 +487,7 @@ protected:
|
||||
mem.data_pointer = 0;
|
||||
|
||||
/* perform the allocation on the actual device */
|
||||
device->mem_alloc(mem, type);
|
||||
device->mem_alloc(NULL, mem, type);
|
||||
|
||||
/* store a mapping to/from client_pointer and real device pointer */
|
||||
pointer_mapping_insert(client_pointer, mem.device_pointer);
|
||||
|
@@ -33,12 +33,12 @@
|
||||
#include <sstream>
|
||||
#include <deque>
|
||||
|
||||
#include "buffers.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_list.h"
|
||||
#include "util_map.h"
|
||||
#include "util_string.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_list.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_string.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -16,12 +16,12 @@
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl/opencl.h"
|
||||
#include "device/opencl/opencl.h"
|
||||
|
||||
#include "device_intern.h"
|
||||
#include "device/device_intern.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
306
intern/cycles/device/device_split_kernel.cpp
Normal file
306
intern/cycles/device/device_split_kernel.cpp
Normal file
@@ -0,0 +1,306 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/device_split_kernel.h"
|
||||
|
||||
#include "kernel/kernel_types.h"
|
||||
#include "kernel/split/kernel_split_data_types.h"
|
||||
|
||||
#include "util/util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
static const double alpha = 0.1; /* alpha for rolling average */
|
||||
|
||||
DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
|
||||
{
|
||||
current_max_closure = -1;
|
||||
first_tile = true;
|
||||
|
||||
avg_time_per_sample = 0.0;
|
||||
|
||||
kernel_path_init = NULL;
|
||||
kernel_scene_intersect = NULL;
|
||||
kernel_lamp_emission = NULL;
|
||||
kernel_do_volume = NULL;
|
||||
kernel_queue_enqueue = NULL;
|
||||
kernel_indirect_background = NULL;
|
||||
kernel_shader_eval = NULL;
|
||||
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
|
||||
kernel_subsurface_scatter = NULL;
|
||||
kernel_direct_lighting = NULL;
|
||||
kernel_shadow_blocked_ao = NULL;
|
||||
kernel_shadow_blocked_dl = NULL;
|
||||
kernel_next_iteration_setup = NULL;
|
||||
kernel_indirect_subsurface = NULL;
|
||||
kernel_buffer_update = NULL;
|
||||
}
|
||||
|
||||
DeviceSplitKernel::~DeviceSplitKernel()
|
||||
{
|
||||
device->mem_free(split_data);
|
||||
device->mem_free(ray_state);
|
||||
device->mem_free(use_queues_flag);
|
||||
device->mem_free(queue_index);
|
||||
device->mem_free(work_pool_wgs);
|
||||
|
||||
delete kernel_path_init;
|
||||
delete kernel_scene_intersect;
|
||||
delete kernel_lamp_emission;
|
||||
delete kernel_do_volume;
|
||||
delete kernel_queue_enqueue;
|
||||
delete kernel_indirect_background;
|
||||
delete kernel_shader_eval;
|
||||
delete kernel_holdout_emission_blurring_pathtermination_ao;
|
||||
delete kernel_subsurface_scatter;
|
||||
delete kernel_direct_lighting;
|
||||
delete kernel_shadow_blocked_ao;
|
||||
delete kernel_shadow_blocked_dl;
|
||||
delete kernel_next_iteration_setup;
|
||||
delete kernel_indirect_subsurface;
|
||||
delete kernel_buffer_update;
|
||||
}
|
||||
|
||||
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
#define LOAD_KERNEL(name) \
|
||||
kernel_##name = get_split_kernel_function(#name, requested_features); \
|
||||
if(!kernel_##name) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
LOAD_KERNEL(path_init);
|
||||
LOAD_KERNEL(scene_intersect);
|
||||
LOAD_KERNEL(lamp_emission);
|
||||
LOAD_KERNEL(do_volume);
|
||||
LOAD_KERNEL(queue_enqueue);
|
||||
LOAD_KERNEL(indirect_background);
|
||||
LOAD_KERNEL(shader_eval);
|
||||
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
|
||||
LOAD_KERNEL(subsurface_scatter);
|
||||
LOAD_KERNEL(direct_lighting);
|
||||
LOAD_KERNEL(shadow_blocked_ao);
|
||||
LOAD_KERNEL(shadow_blocked_dl);
|
||||
LOAD_KERNEL(next_iteration_setup);
|
||||
LOAD_KERNEL(indirect_subsurface);
|
||||
LOAD_KERNEL(buffer_update);
|
||||
|
||||
#undef LOAD_KERNEL
|
||||
|
||||
current_max_closure = requested_features.max_closure;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
|
||||
{
|
||||
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
|
||||
return max_buffer_size / size_per_element;
|
||||
}
|
||||
|
||||
bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
||||
RenderTile& tile,
|
||||
device_memory& kgbuffer,
|
||||
device_memory& kernel_data)
|
||||
{
|
||||
if(device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get local size */
|
||||
size_t local_size[2];
|
||||
{
|
||||
int2 lsize = split_kernel_local_size();
|
||||
local_size[0] = lsize[0];
|
||||
local_size[1] = lsize[1];
|
||||
}
|
||||
|
||||
/* Set gloabl size */
|
||||
size_t global_size[2];
|
||||
{
|
||||
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
|
||||
|
||||
/* Make sure that set work size is a multiple of local
|
||||
* work size dimensions.
|
||||
*/
|
||||
global_size[0] = round_up(gsize[0], local_size[0]);
|
||||
global_size[1] = round_up(gsize[1], local_size[1]);
|
||||
}
|
||||
|
||||
/* Number of elements in the global state buffer */
|
||||
int num_global_elements = global_size[0] * global_size[1];
|
||||
assert(num_global_elements % WORK_POOL_SIZE == 0);
|
||||
|
||||
/* Allocate all required global memory once. */
|
||||
if(first_tile) {
|
||||
first_tile = false;
|
||||
|
||||
/* Calculate max groups */
|
||||
|
||||
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
|
||||
unsigned int max_work_groups = num_global_elements / WORK_POOL_SIZE + 1;
|
||||
|
||||
/* Allocate work_pool_wgs memory. */
|
||||
work_pool_wgs.resize(max_work_groups * sizeof(unsigned int));
|
||||
device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE);
|
||||
|
||||
queue_index.resize(NUM_QUEUES * sizeof(int));
|
||||
device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE);
|
||||
|
||||
use_queues_flag.resize(sizeof(char));
|
||||
device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
|
||||
|
||||
ray_state.resize(num_global_elements);
|
||||
device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
|
||||
|
||||
split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
|
||||
device->mem_alloc("split_data", split_data, MEM_READ_WRITE);
|
||||
}
|
||||
|
||||
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
|
||||
if(device->have_error()) { \
|
||||
return false; \
|
||||
} \
|
||||
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
tile.sample = tile.start_sample;
|
||||
|
||||
/* for exponential increase between tile updates */
|
||||
int time_multiplier = 1;
|
||||
|
||||
while(tile.sample < tile.start_sample + tile.num_samples) {
|
||||
/* to keep track of how long it takes to run a number of samples */
|
||||
double start_time = time_dt();
|
||||
|
||||
/* initial guess to start rolling average */
|
||||
const int initial_num_samples = 1;
|
||||
/* approx number of samples per second */
|
||||
int samples_per_second = (avg_time_per_sample > 0.0) ?
|
||||
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
|
||||
|
||||
RenderTile subtile = tile;
|
||||
subtile.start_sample = tile.sample;
|
||||
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
|
||||
|
||||
if(device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* reset state memory here as global size for data_init
|
||||
* kernel might not be large enough to do in kernel
|
||||
*/
|
||||
device->mem_zero(work_pool_wgs);
|
||||
device->mem_zero(split_data);
|
||||
device->mem_zero(ray_state);
|
||||
|
||||
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
|
||||
subtile,
|
||||
num_global_elements,
|
||||
kgbuffer,
|
||||
kernel_data,
|
||||
split_data,
|
||||
ray_state,
|
||||
queue_index,
|
||||
use_queues_flag,
|
||||
work_pool_wgs))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
|
||||
|
||||
bool activeRaysAvailable = true;
|
||||
|
||||
while(activeRaysAvailable) {
|
||||
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
|
||||
for(int PathIter = 0; PathIter < 16; PathIter++) {
|
||||
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide if we should exit path-iteration in host. */
|
||||
device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1);
|
||||
|
||||
activeRaysAvailable = false;
|
||||
|
||||
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
|
||||
int8_t state = ray_state.get_data()[rayStateIter];
|
||||
|
||||
if(state != RAY_INACTIVE) {
|
||||
if(state == RAY_INVALID) {
|
||||
/* Something went wrong, abort to avoid looping endlessly. */
|
||||
device->set_error("Split kernel error: invalid ray state");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Not all rays are RAY_INACTIVE. */
|
||||
activeRaysAvailable = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
|
||||
|
||||
if(avg_time_per_sample == 0.0) {
|
||||
/* start rolling average */
|
||||
avg_time_per_sample = time_per_sample;
|
||||
}
|
||||
else {
|
||||
avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
|
||||
}
|
||||
|
||||
#undef ENQUEUE_SPLIT_KERNEL
|
||||
|
||||
tile.sample += subtile.num_samples;
|
||||
task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
|
||||
|
||||
time_multiplier = min(time_multiplier << 1, 10);
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
132
intern/cycles/device/device_split_kernel.h
Normal file
132
intern/cycles/device/device_split_kernel.h
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __DEVICE_SPLIT_KERNEL_H__
|
||||
#define __DEVICE_SPLIT_KERNEL_H__
|
||||
|
||||
#include "device/device.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* When allocate global memory in chunks. We may not be able to
|
||||
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
|
||||
* Since some bytes may be needed for aligning chunks of memory;
|
||||
* This is the amount of memory that we dedicate for that purpose.
|
||||
*/
|
||||
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
|
||||
|
||||
/* Types used for split kernel */
|
||||
|
||||
class KernelDimensions {
|
||||
public:
|
||||
size_t global_size[2];
|
||||
size_t local_size[2];
|
||||
|
||||
KernelDimensions(size_t global_size_[2], size_t local_size_[2])
|
||||
{
|
||||
memcpy(global_size, global_size_, sizeof(global_size));
|
||||
memcpy(local_size, local_size_, sizeof(local_size));
|
||||
}
|
||||
};
|
||||
|
||||
class SplitKernelFunction {
|
||||
public:
|
||||
virtual ~SplitKernelFunction() {}
|
||||
|
||||
/* enqueue the kernel, returns false if there is an error */
|
||||
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
|
||||
};
|
||||
|
||||
class DeviceSplitKernel {
|
||||
private:
|
||||
Device *device;
|
||||
|
||||
SplitKernelFunction *kernel_path_init;
|
||||
SplitKernelFunction *kernel_scene_intersect;
|
||||
SplitKernelFunction *kernel_lamp_emission;
|
||||
SplitKernelFunction *kernel_do_volume;
|
||||
SplitKernelFunction *kernel_queue_enqueue;
|
||||
SplitKernelFunction *kernel_indirect_background;
|
||||
SplitKernelFunction *kernel_shader_eval;
|
||||
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
|
||||
SplitKernelFunction *kernel_subsurface_scatter;
|
||||
SplitKernelFunction *kernel_direct_lighting;
|
||||
SplitKernelFunction *kernel_shadow_blocked_ao;
|
||||
SplitKernelFunction *kernel_shadow_blocked_dl;
|
||||
SplitKernelFunction *kernel_next_iteration_setup;
|
||||
SplitKernelFunction *kernel_indirect_subsurface;
|
||||
SplitKernelFunction *kernel_buffer_update;
|
||||
|
||||
/* Global memory variables [porting]; These memory is used for
|
||||
* co-operation between different kernels; Data written by one
|
||||
* kernel will be available to another kernel via this global
|
||||
* memory.
|
||||
*/
|
||||
device_memory split_data;
|
||||
device_vector<uchar> ray_state;
|
||||
device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
|
||||
|
||||
/* Flag to make sceneintersect and lampemission kernel use queues. */
|
||||
device_memory use_queues_flag;
|
||||
|
||||
/* Approximate time it takes to complete one sample */
|
||||
double avg_time_per_sample;
|
||||
|
||||
/* Work pool with respect to each work group. */
|
||||
device_memory work_pool_wgs;
|
||||
|
||||
/* clos_max value for which the kernels have been loaded currently. */
|
||||
int current_max_closure;
|
||||
|
||||
/* Marked True in constructor and marked false at the end of path_trace(). */
|
||||
bool first_tile;
|
||||
|
||||
public:
|
||||
explicit DeviceSplitKernel(Device* device);
|
||||
virtual ~DeviceSplitKernel();
|
||||
|
||||
bool load_kernels(const DeviceRequestedFeatures& requested_features);
|
||||
bool path_trace(DeviceTask *task,
|
||||
RenderTile& rtile,
|
||||
device_memory& kgbuffer,
|
||||
device_memory& kernel_data);
|
||||
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
|
||||
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& kernel_data_,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs) = 0;
|
||||
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
|
||||
virtual int2 split_kernel_local_size() = 0;
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __DEVICE_SPLIT_KERNEL_H__ */
|
||||
|
||||
|
||||
|
@@ -17,12 +17,12 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "device_task.h"
|
||||
#include "device/device_task.h"
|
||||
|
||||
#include "buffers.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_algorithm.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -17,11 +17,11 @@
|
||||
#ifndef __DEVICE_TASK_H__
|
||||
#define __DEVICE_TASK_H__
|
||||
|
||||
#include "device_memory.h"
|
||||
#include "device/device_memory.h"
|
||||
|
||||
#include "util_function.h"
|
||||
#include "util_list.h"
|
||||
#include "util_task.h"
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_list.h"
|
||||
#include "util/util_task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -51,6 +51,8 @@ public:
|
||||
int shader_filter;
|
||||
int shader_x, shader_w;
|
||||
|
||||
int passes_size;
|
||||
|
||||
explicit DeviceTask(Type type = PATH_TRACE);
|
||||
|
||||
int get_subtask_count(int num, int max_size = 0);
|
||||
|
@@ -16,40 +16,40 @@
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "device.h"
|
||||
#include "device/device.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_param.h"
|
||||
#include "util_string.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_param.h"
|
||||
#include "util/util_string.h"
|
||||
|
||||
#include "clew.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
|
||||
#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
|
||||
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
|
||||
# undef clEnqueueNDRangeKernel
|
||||
# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
|
||||
clFinish(a); \
|
||||
CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
|
||||
clFinish(a);
|
||||
|
||||
# undef clEnqueueWriteBuffer
|
||||
# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
|
||||
clFinish(a); \
|
||||
CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
|
||||
clFinish(a);
|
||||
|
||||
# undef clEnqueueReadBuffer
|
||||
# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
|
||||
clFinish(a); \
|
||||
CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
|
||||
clFinish(a);
|
||||
#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
|
||||
|
||||
#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
|
||||
|
||||
/* Macro declarations used with split kernel */
|
||||
|
||||
/* Macro to enable/disable work-stealing */
|
||||
#define __WORK_STEALING__
|
||||
|
||||
#define SPLIT_KERNEL_LOCAL_SIZE_X 64
|
||||
#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
|
||||
|
||||
/* This value may be tuned according to the scene we are rendering.
|
||||
*
|
||||
* Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
|
||||
* ray-bounces will improve performance.
|
||||
*/
|
||||
#define PATH_ITER_INC_FACTOR 8
|
||||
|
||||
/* When allocate global memory in chunks. We may not be able to
|
||||
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
|
||||
* Since some bytes may be needed for aligning chunks of memory;
|
||||
* This is the amount of memory that we dedicate for that purpose.
|
||||
*/
|
||||
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
|
||||
|
||||
struct OpenCLPlatformDevice {
|
||||
OpenCLPlatformDevice(cl_platform_id platform_id,
|
||||
const string& platform_name,
|
||||
@@ -90,6 +90,54 @@ public:
|
||||
cl_device_id device_id);
|
||||
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
|
||||
bool force_all = false);
|
||||
static bool use_single_program();
|
||||
|
||||
/* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
|
||||
|
||||
/* Platform information. */
|
||||
static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
|
||||
static cl_uint get_num_platforms();
|
||||
|
||||
static bool get_platforms(vector<cl_platform_id> *platform_ids,
|
||||
cl_int *error = NULL);
|
||||
static vector<cl_platform_id> get_platforms();
|
||||
|
||||
static bool get_platform_name(cl_platform_id platform_id,
|
||||
string *platform_name);
|
||||
static string get_platform_name(cl_platform_id platform_id);
|
||||
|
||||
static bool get_num_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type,
|
||||
cl_uint *num_devices,
|
||||
cl_int *error = NULL);
|
||||
static cl_uint get_num_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type);
|
||||
|
||||
static bool get_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type,
|
||||
vector<cl_device_id> *device_ids,
|
||||
cl_int* error = NULL);
|
||||
static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type);
|
||||
|
||||
/* Device information. */
|
||||
static bool get_device_name(cl_device_id device_id,
|
||||
string *device_name,
|
||||
cl_int* error = NULL);
|
||||
|
||||
static string get_device_name(cl_device_id device_id);
|
||||
|
||||
static bool get_device_type(cl_device_id device_id,
|
||||
cl_device_type *device_type,
|
||||
cl_int* error = NULL);
|
||||
static cl_device_type get_device_type(cl_device_id device_id);
|
||||
|
||||
/* Get somewhat more readable device name.
|
||||
* Main difference is AMD OpenCL here which only gives code name
|
||||
* for the regular device name. This will give more sane device
|
||||
* name using some extensions.
|
||||
*/
|
||||
static string get_readable_device_name(cl_device_id device_id);
|
||||
};
|
||||
|
||||
/* Thread safe cache for contexts and programs.
|
||||
@@ -248,6 +296,7 @@ public:
|
||||
|
||||
bool device_initialized;
|
||||
string platform_name;
|
||||
string device_name;
|
||||
|
||||
bool opencl_error(cl_int err);
|
||||
void opencl_error(const string& message);
|
||||
@@ -266,10 +315,10 @@ public:
|
||||
|
||||
/* Has to be implemented by the real device classes.
|
||||
* The base device will then load all these programs. */
|
||||
virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
|
||||
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
|
||||
vector<OpenCLProgram*> &programs) = 0;
|
||||
|
||||
void mem_alloc(device_memory& mem, MemoryType type);
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type);
|
||||
void mem_copy_to(device_memory& mem);
|
||||
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
|
||||
void mem_zero(device_memory& mem);
|
||||
@@ -326,16 +375,39 @@ protected:
|
||||
|
||||
class ArgumentWrapper {
|
||||
public:
|
||||
ArgumentWrapper() : size(0), pointer(NULL) {}
|
||||
template <typename T>
|
||||
ArgumentWrapper() : size(0), pointer(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
ArgumentWrapper(device_memory& argument) : size(sizeof(void*)),
|
||||
pointer((void*)(&argument.device_pointer))
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)),
|
||||
pointer((void*)(&argument.device_pointer))
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ArgumentWrapper(T& argument) : size(sizeof(argument)),
|
||||
pointer(&argument) { }
|
||||
pointer(&argument)
|
||||
{
|
||||
}
|
||||
|
||||
ArgumentWrapper(int argument) : size(sizeof(int)),
|
||||
int_value(argument),
|
||||
pointer(&int_value) { }
|
||||
pointer(&int_value)
|
||||
{
|
||||
}
|
||||
|
||||
ArgumentWrapper(float argument) : size(sizeof(float)),
|
||||
float_value(argument),
|
||||
pointer(&float_value) { }
|
||||
pointer(&float_value)
|
||||
{
|
||||
}
|
||||
|
||||
size_t size;
|
||||
int int_value;
|
||||
float float_value;
|
||||
|
@@ -16,15 +16,15 @@
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl.h"
|
||||
#include "device/opencl/opencl.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_md5.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_md5.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -82,9 +82,10 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
|
||||
cpPlatform = platform_device.platform_id;
|
||||
cdDevice = platform_device.device_id;
|
||||
platform_name = platform_device.platform_name;
|
||||
device_name = platform_device.device_name;
|
||||
VLOG(2) << "Creating new Cycles device for OpenCL platform "
|
||||
<< platform_name << ", device "
|
||||
<< platform_device.device_name << ".";
|
||||
<< device_name << ".";
|
||||
|
||||
{
|
||||
/* try to use cached context */
|
||||
@@ -113,12 +114,16 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
|
||||
}
|
||||
|
||||
cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
if(opencl_error(ciErr)) {
|
||||
opencl_error("OpenCL: Error creating command queue");
|
||||
return;
|
||||
}
|
||||
|
||||
null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
if(opencl_error(ciErr)) {
|
||||
opencl_error("OpenCL: Error creating memory buffer for NULL");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Device init success\n");
|
||||
device_initialized = true;
|
||||
@@ -147,10 +152,8 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
|
||||
void CL_CALLBACK OpenCLDeviceBase::context_notify_callback(const char *err_info,
|
||||
const void * /*private_info*/, size_t /*cb*/, void *user_data)
|
||||
{
|
||||
char name[256];
|
||||
clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL);
|
||||
|
||||
fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info);
|
||||
string device_name = OpenCLInfo::get_device_name((cl_device_id)user_data);
|
||||
fprintf(stderr, "OpenCL error (%s): %s\n", device_name.c_str(), err_info);
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::opencl_version_check()
|
||||
@@ -191,6 +194,8 @@ string OpenCLDeviceBase::device_md5_hash(string kernel_custom_build_options)
|
||||
|
||||
bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
VLOG(2) << "Loading kernels for platform " << platform_name
|
||||
<< ", device " << device_name << ".";
|
||||
/* Verify if device was initialized. */
|
||||
if(!device_initialized) {
|
||||
fprintf(stderr, "OpenCL: failed to initialize device.\n");
|
||||
@@ -206,11 +211,14 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
||||
base_program.add_kernel(ustring("convert_to_half_float"));
|
||||
base_program.add_kernel(ustring("shader"));
|
||||
base_program.add_kernel(ustring("bake"));
|
||||
base_program.add_kernel(ustring("zero_buffer"));
|
||||
|
||||
vector<OpenCLProgram*> programs;
|
||||
programs.push_back(&base_program);
|
||||
/* Call actual class to fill the vector with its programs. */
|
||||
load_kernels(requested_features, programs);
|
||||
if(!load_kernels(requested_features, programs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
|
||||
* serialize the calls internally, so it's not much use right now.
|
||||
@@ -242,8 +250,14 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_alloc(device_memory& mem, MemoryType type)
|
||||
void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
size_t size = mem.memory_size();
|
||||
|
||||
cl_mem_flags mem_flag;
|
||||
@@ -311,8 +325,61 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
|
||||
void OpenCLDeviceBase::mem_zero(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
mem_copy_to(mem);
|
||||
if(base_program.is_loaded()) {
|
||||
cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
|
||||
|
||||
size_t global_size[] = {1024, 1024};
|
||||
size_t num_threads = global_size[0] * global_size[1];
|
||||
|
||||
cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
|
||||
cl_ulong d_offset = 0;
|
||||
cl_ulong d_size = 0;
|
||||
|
||||
while(d_offset < mem.memory_size()) {
|
||||
d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
|
||||
|
||||
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
|
||||
|
||||
ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
|
||||
ckZeroBuffer,
|
||||
2,
|
||||
NULL,
|
||||
global_size,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
NULL);
|
||||
opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
|
||||
|
||||
d_offset += d_size;
|
||||
}
|
||||
}
|
||||
|
||||
if(mem.data_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
if(!base_program.is_loaded()) {
|
||||
void* zero = (void*)mem.data_pointer;
|
||||
|
||||
if(!mem.data_pointer) {
|
||||
zero = util_aligned_malloc(mem.memory_size(), 16);
|
||||
memset(zero, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
|
||||
CL_MEM_PTR(mem.device_pointer),
|
||||
CL_TRUE,
|
||||
0,
|
||||
mem.memory_size(),
|
||||
zero,
|
||||
0,
|
||||
NULL, NULL));
|
||||
|
||||
if(!mem.data_pointer) {
|
||||
util_aligned_free(zero);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,7 +404,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
|
||||
device_vector<uchar> *data = new device_vector<uchar>();
|
||||
data->copy((uchar*)host, size);
|
||||
|
||||
mem_alloc(*data, MEM_READ_ONLY);
|
||||
mem_alloc(name, *data, MEM_READ_ONLY);
|
||||
i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
|
||||
}
|
||||
else {
|
||||
@@ -356,7 +423,7 @@ void OpenCLDeviceBase::tex_alloc(const char *name,
|
||||
VLOG(1) << "Texture allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
assert(mem_map.find(name) == mem_map.end());
|
||||
mem_map.insert(MemMap::value_type(name, mem.device_pointer));
|
||||
@@ -460,7 +527,7 @@ void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name);
|
||||
#include "kernel_textures.h"
|
||||
#include "kernel/kernel_textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
start_arg_index += kernel_set_args(ckFilmConvertKernel,
|
||||
@@ -511,7 +578,7 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
set_kernel_arg_mem(kernel, &start_arg_index, #name);
|
||||
#include "kernel_textures.h"
|
||||
#include "kernel/kernel_textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
start_arg_index += kernel_set_args(kernel,
|
||||
|
@@ -16,15 +16,15 @@
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl.h"
|
||||
#include "device/opencl/opencl.h"
|
||||
|
||||
#include "buffers.h"
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
|
||||
#include "util_md5.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_md5.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -43,11 +43,12 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
|
||||
virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
|
||||
vector<OpenCLProgram*> &programs)
|
||||
{
|
||||
path_trace_program.add_kernel(ustring("path_trace"));
|
||||
programs.push_back(&path_trace_program);
|
||||
return true;
|
||||
}
|
||||
|
||||
~OpenCLDeviceMegaKernel()
|
||||
@@ -83,7 +84,7 @@ public:
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
|
||||
#include "kernel_textures.h"
|
||||
#include "kernel/kernel_textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
start_arg_index += kernel_set_args(ckPathTraceKernel,
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -16,11 +16,12 @@
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl.h"
|
||||
#include "device/opencl/opencl.h"
|
||||
|
||||
#include "util_logging.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_md5.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
@@ -234,7 +235,7 @@ string OpenCLCache::get_kernel_md5()
|
||||
thread_scoped_lock lock(self.kernel_md5_lock);
|
||||
|
||||
if(self.kernel_md5.empty()) {
|
||||
self.kernel_md5 = path_files_md5_hash(path_get("kernel"));
|
||||
self.kernel_md5 = path_files_md5_hash(path_get("source"));
|
||||
}
|
||||
return self.kernel_md5;
|
||||
}
|
||||
@@ -309,6 +310,8 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
|
||||
string build_options;
|
||||
build_options = device->kernel_build_options(debug_src) + kernel_build_options;
|
||||
|
||||
VLOG(1) << "Build options passed to clBuildProgram: '"
|
||||
<< build_options << "'.";
|
||||
cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
|
||||
|
||||
/* show warnings even if build is successful */
|
||||
@@ -336,12 +339,13 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
|
||||
|
||||
bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
|
||||
{
|
||||
string source = "#include \"kernels/opencl/" + kernel_file + "\" // " + OpenCLCache::get_kernel_md5() + "\n";
|
||||
string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
|
||||
/* We compile kernels consisting of many files. unfortunately OpenCL
|
||||
* kernel caches do not seem to recognize changes in included files.
|
||||
* so we force recompile on changes by adding the md5 hash of all files.
|
||||
*/
|
||||
source = path_source_replace_includes(source, path_get("kernel"));
|
||||
source = path_source_replace_includes(source, path_get("source"));
|
||||
source += "\n// " + util_md5_string(source) + "\n";
|
||||
|
||||
if(debug_src) {
|
||||
path_write_text(*debug_src, source);
|
||||
@@ -352,10 +356,10 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
|
||||
cl_int ciErr;
|
||||
|
||||
program = clCreateProgramWithSource(device->cxContext,
|
||||
1,
|
||||
&source_str,
|
||||
&source_len,
|
||||
&ciErr);
|
||||
1,
|
||||
&source_str,
|
||||
&source_len,
|
||||
&ciErr);
|
||||
|
||||
if(ciErr != CL_SUCCESS) {
|
||||
add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
|
||||
@@ -438,7 +442,11 @@ void OpenCLDeviceBase::OpenCLProgram::load()
|
||||
if(!program) {
|
||||
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
|
||||
|
||||
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + OpenCLCache::get_kernel_md5();
|
||||
/* need to create source to get md5 */
|
||||
string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
|
||||
source = path_source_replace_includes(source, path_get("source"));
|
||||
|
||||
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
|
||||
basename = path_cache_get(path_join("kernels", basename));
|
||||
string clbin = basename + ".clbin";
|
||||
|
||||
@@ -544,6 +552,11 @@ bool OpenCLInfo::use_debug()
|
||||
return DebugFlags().opencl.debug;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::use_single_program()
|
||||
{
|
||||
return DebugFlags().opencl.single_program;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
|
||||
{
|
||||
/* keep this in sync with kernel_types.h! */
|
||||
@@ -587,11 +600,20 @@ bool OpenCLInfo::device_supported(const string& platform_name,
|
||||
const cl_device_id device_id)
|
||||
{
|
||||
cl_device_type device_type;
|
||||
clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_TYPE,
|
||||
sizeof(cl_device_type),
|
||||
&device_type,
|
||||
NULL);
|
||||
if(!get_device_type(device_id, &device_type)) {
|
||||
return false;
|
||||
}
|
||||
string device_name;
|
||||
if(!get_device_name(device_id, &device_name)) {
|
||||
return false;
|
||||
}
|
||||
/* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
|
||||
* (aka, it will not be on Intel framework). This isn't supported
|
||||
* and needs an explicit blacklist.
|
||||
*/
|
||||
if(strstr(device_name.c_str(), "Iris")) {
|
||||
return false;
|
||||
}
|
||||
if(platform_name == "AMD Accelerated Parallel Processing" &&
|
||||
device_type == CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
@@ -705,39 +727,30 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
|
||||
return;
|
||||
}
|
||||
|
||||
cl_int error;
|
||||
vector<cl_device_id> device_ids;
|
||||
cl_uint num_devices = 0;
|
||||
vector<cl_platform_id> platform_ids;
|
||||
cl_uint num_platforms = 0;
|
||||
|
||||
/* Get devices. */
|
||||
if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS ||
|
||||
num_platforms == 0)
|
||||
{
|
||||
/* Get platforms. */
|
||||
if(!get_platforms(&platform_ids, &error)) {
|
||||
FIRST_VLOG(2) << "Error fetching platforms:"
|
||||
<< string(clewErrorString(error));
|
||||
first_time = false;
|
||||
return;
|
||||
}
|
||||
if(platform_ids.size() == 0) {
|
||||
FIRST_VLOG(2) << "No OpenCL platforms were found.";
|
||||
first_time = false;
|
||||
return;
|
||||
}
|
||||
platform_ids.resize(num_platforms);
|
||||
if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) {
|
||||
FIRST_VLOG(2) << "Failed to fetch platform IDs from the driver..";
|
||||
first_time = false;
|
||||
return;
|
||||
}
|
||||
/* Devices are numbered consecutively across platforms. */
|
||||
for(int platform = 0; platform < num_platforms; platform++) {
|
||||
for(int platform = 0; platform < platform_ids.size(); platform++) {
|
||||
cl_platform_id platform_id = platform_ids[platform];
|
||||
char pname[256];
|
||||
if(clGetPlatformInfo(platform_id,
|
||||
CL_PLATFORM_NAME,
|
||||
sizeof(pname),
|
||||
&pname,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
string platform_name;
|
||||
if(!get_platform_name(platform_id, &platform_name)) {
|
||||
FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
|
||||
continue;
|
||||
}
|
||||
string platform_name = pname;
|
||||
FIRST_VLOG(2) << "Enumerating devices for platform "
|
||||
<< platform_name << ".";
|
||||
if(!platform_version_check(platform_id)) {
|
||||
@@ -745,39 +758,28 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
|
||||
<< " due to too old compiler version.";
|
||||
continue;
|
||||
}
|
||||
num_devices = 0;
|
||||
cl_int ciErr;
|
||||
if((ciErr = clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
0,
|
||||
NULL,
|
||||
&num_devices)) != CL_SUCCESS || num_devices == 0)
|
||||
if(!get_platform_devices(platform_id,
|
||||
device_type,
|
||||
&device_ids,
|
||||
&error))
|
||||
{
|
||||
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
||||
<< ", failed to fetch number of devices: " << string(clewErrorString(ciErr));
|
||||
<< ", failed to fetch of devices: "
|
||||
<< string(clewErrorString(error));
|
||||
continue;
|
||||
}
|
||||
device_ids.resize(num_devices);
|
||||
if(clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
num_devices,
|
||||
&device_ids[0],
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
if(device_ids.size() == 0) {
|
||||
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
||||
<< ", failed to fetch devices list.";
|
||||
<< ", it has no devices.";
|
||||
continue;
|
||||
}
|
||||
for(int num = 0; num < num_devices; num++) {
|
||||
cl_device_id device_id = device_ids[num];
|
||||
char device_name[1024] = "\0";
|
||||
if(clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_NAME,
|
||||
sizeof(device_name),
|
||||
&device_name,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
FIRST_VLOG(2) << "Failed to fetch device name, ignoring.";
|
||||
for(int num = 0; num < device_ids.size(); num++) {
|
||||
const cl_device_id device_id = device_ids[num];
|
||||
string device_name;
|
||||
if(!get_device_name(device_id, &device_name, &error)) {
|
||||
FIRST_VLOG(2) << "Failed to fetch device name: "
|
||||
<< string(clewErrorString(error))
|
||||
<< ", ignoring.";
|
||||
continue;
|
||||
}
|
||||
if(!device_version_check(device_id)) {
|
||||
@@ -789,24 +791,28 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
|
||||
device_supported(platform_name, device_id))
|
||||
{
|
||||
cl_device_type device_type;
|
||||
if(clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_TYPE,
|
||||
sizeof(cl_device_type),
|
||||
&device_type,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
if(!get_device_type(device_id, &device_type, &error)) {
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name
|
||||
<< ", failed to fetch device type.";
|
||||
<< ", failed to fetch device type:"
|
||||
<< string(clewErrorString(error));
|
||||
continue;
|
||||
}
|
||||
FIRST_VLOG(2) << "Adding new device " << device_name << ".";
|
||||
string readable_device_name =
|
||||
get_readable_device_name(device_id);
|
||||
if(readable_device_name != device_name) {
|
||||
FIRST_VLOG(2) << "Using more readable device name: "
|
||||
<< readable_device_name;
|
||||
}
|
||||
FIRST_VLOG(2) << "Adding new device "
|
||||
<< readable_device_name << ".";
|
||||
string hardware_id = get_hardware_id(platform_name, device_id);
|
||||
usable_devices->push_back(OpenCLPlatformDevice(platform_id,
|
||||
platform_name,
|
||||
device_id,
|
||||
device_type,
|
||||
device_name,
|
||||
hardware_id));
|
||||
usable_devices->push_back(OpenCLPlatformDevice(
|
||||
platform_id,
|
||||
platform_name,
|
||||
device_id,
|
||||
device_type,
|
||||
readable_device_name,
|
||||
hardware_id));
|
||||
}
|
||||
else {
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name
|
||||
@@ -817,6 +823,252 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
|
||||
first_time = false;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
|
||||
cl_int *error)
|
||||
{
|
||||
/* Reset from possible previous state. */
|
||||
platform_ids->resize(0);
|
||||
cl_uint num_platforms;
|
||||
if(!get_num_platforms(&num_platforms, error)) {
|
||||
return false;
|
||||
}
|
||||
/* Get actual platforms. */
|
||||
cl_int err;
|
||||
platform_ids->resize(num_platforms);
|
||||
if((err = clGetPlatformIDs(num_platforms,
|
||||
&platform_ids->at(0),
|
||||
NULL)) != CL_SUCCESS) {
|
||||
if(error != NULL) {
|
||||
*error = err;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = CL_SUCCESS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
vector<cl_platform_id> OpenCLInfo::get_platforms()
|
||||
{
|
||||
vector<cl_platform_id> platform_ids;
|
||||
get_platforms(&platform_ids);
|
||||
return platform_ids;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
|
||||
{
|
||||
cl_int err;
|
||||
if((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
|
||||
if(error != NULL) {
|
||||
*error = err;
|
||||
}
|
||||
*num_platforms = 0;
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = CL_SUCCESS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
cl_uint OpenCLInfo::get_num_platforms()
|
||||
{
|
||||
cl_uint num_platforms;
|
||||
if(!get_num_platforms(&num_platforms)) {
|
||||
return 0;
|
||||
}
|
||||
return num_platforms;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
|
||||
string *platform_name)
|
||||
{
|
||||
char buffer[256];
|
||||
if(clGetPlatformInfo(platform_id,
|
||||
CL_PLATFORM_NAME,
|
||||
sizeof(buffer),
|
||||
&buffer,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
*platform_name = "";
|
||||
return false;
|
||||
}
|
||||
*platform_name = buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
|
||||
{
|
||||
string platform_name;
|
||||
if (!get_platform_name(platform_id, &platform_name)) {
|
||||
return "";
|
||||
}
|
||||
return platform_name;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type,
|
||||
cl_uint *num_devices,
|
||||
cl_int *error)
|
||||
{
|
||||
cl_int err;
|
||||
if((err = clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
0,
|
||||
NULL,
|
||||
num_devices)) != CL_SUCCESS)
|
||||
{
|
||||
if(error != NULL) {
|
||||
*error = err;
|
||||
}
|
||||
*num_devices = 0;
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = CL_SUCCESS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type)
|
||||
{
|
||||
cl_uint num_devices;
|
||||
if(!get_num_platform_devices(platform_id,
|
||||
device_type,
|
||||
&num_devices))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return num_devices;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type,
|
||||
vector<cl_device_id> *device_ids,
|
||||
cl_int* error)
|
||||
{
|
||||
/* Reset from possible previous state. */
|
||||
device_ids->resize(0);
|
||||
/* Get number of devices to pre-allocate memory. */
|
||||
cl_uint num_devices;
|
||||
if(!get_num_platform_devices(platform_id,
|
||||
device_type,
|
||||
&num_devices,
|
||||
error))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
/* Get actual device list. */
|
||||
device_ids->resize(num_devices);
|
||||
cl_int err;
|
||||
if((err = clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
num_devices,
|
||||
&device_ids->at(0),
|
||||
NULL)) != CL_SUCCESS)
|
||||
{
|
||||
if(error != NULL) {
|
||||
*error = err;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = CL_SUCCESS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
|
||||
cl_device_type device_type)
|
||||
{
|
||||
vector<cl_device_id> devices;
|
||||
get_platform_devices(platform_id, device_type, &devices);
|
||||
return devices;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_device_name(cl_device_id device_id,
|
||||
string *device_name,
|
||||
cl_int* error)
|
||||
{
|
||||
char buffer[1024];
|
||||
cl_int err;
|
||||
if((err = clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_NAME,
|
||||
sizeof(buffer),
|
||||
&buffer,
|
||||
NULL)) != CL_SUCCESS)
|
||||
{
|
||||
if(error != NULL) {
|
||||
*error = err;
|
||||
}
|
||||
*device_name = "";
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = CL_SUCCESS;
|
||||
}
|
||||
*device_name = buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
string OpenCLInfo::get_device_name(cl_device_id device_id)
|
||||
{
|
||||
string device_name;
|
||||
if(!get_device_name(device_id, &device_name)) {
|
||||
return "";
|
||||
}
|
||||
return device_name;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::get_device_type(cl_device_id device_id,
|
||||
cl_device_type *device_type,
|
||||
cl_int* error)
|
||||
{
|
||||
cl_int err;
|
||||
if((err = clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_TYPE,
|
||||
sizeof(cl_device_type),
|
||||
device_type,
|
||||
NULL)) != CL_SUCCESS)
|
||||
{
|
||||
if(error != NULL) {
|
||||
*error = err;
|
||||
}
|
||||
*device_type = 0;
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = CL_SUCCESS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
|
||||
{
|
||||
cl_device_type device_type;
|
||||
if(!get_device_type(device_id, &device_type)) {
|
||||
return 0;
|
||||
}
|
||||
return device_type;
|
||||
}
|
||||
|
||||
string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
|
||||
{
|
||||
char board_name[1024];
|
||||
if(clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_BOARD_NAME_AMD,
|
||||
sizeof(board_name),
|
||||
&board_name,
|
||||
NULL) == CL_SUCCESS)
|
||||
{
|
||||
return board_name;
|
||||
}
|
||||
/* Fallback to standard device name API. */
|
||||
return get_device_name(device_id);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
@@ -1,7 +1,6 @@
|
||||
|
||||
set(INC
|
||||
.
|
||||
../util
|
||||
..
|
||||
)
|
||||
|
||||
set(SRC
|
||||
|
@@ -14,12 +14,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "node.h"
|
||||
#include "node_type.h"
|
||||
#include "graph/node.h"
|
||||
#include "graph/node_type.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_param.h"
|
||||
#include "util_transform.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_param.h"
|
||||
#include "util/util_transform.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -16,11 +16,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node_type.h"
|
||||
#include "graph/node_type.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_param.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_param.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -16,8 +16,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_param.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_param.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -14,9 +14,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "node_type.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_transform.h"
|
||||
#include "graph/node_type.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_transform.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -16,12 +16,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node_enum.h"
|
||||
#include "graph/node_enum.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_param.h"
|
||||
#include "util_string.h"
|
||||
#include "util_vector.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_param.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -14,11 +14,11 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "node_xml.h"
|
||||
#include "graph/node_xml.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_string.h"
|
||||
#include "util_transform.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_transform.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -16,11 +16,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node.h"
|
||||
#include "graph/node.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_string.h"
|
||||
#include "util_xml.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_xml.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@@ -1,10 +1,7 @@
|
||||
remove_extra_strict_flags()
|
||||
|
||||
set(INC
|
||||
.
|
||||
../util
|
||||
osl
|
||||
svm
|
||||
..
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
@@ -13,19 +10,28 @@ set(INC_SYS
|
||||
|
||||
set(SRC
|
||||
kernels/cpu/kernel.cpp
|
||||
kernels/cpu/kernel_split.cpp
|
||||
kernels/opencl/kernel.cl
|
||||
kernels/opencl/kernel_state_buffer_size.cl
|
||||
kernels/opencl/kernel_split.cl
|
||||
kernels/opencl/kernel_data_init.cl
|
||||
kernels/opencl/kernel_path_init.cl
|
||||
kernels/opencl/kernel_queue_enqueue.cl
|
||||
kernels/opencl/kernel_scene_intersect.cl
|
||||
kernels/opencl/kernel_lamp_emission.cl
|
||||
kernels/opencl/kernel_background_buffer_update.cl
|
||||
kernels/opencl/kernel_do_volume.cl
|
||||
kernels/opencl/kernel_indirect_background.cl
|
||||
kernels/opencl/kernel_shader_eval.cl
|
||||
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
|
||||
kernels/opencl/kernel_subsurface_scatter.cl
|
||||
kernels/opencl/kernel_direct_lighting.cl
|
||||
kernels/opencl/kernel_shadow_blocked.cl
|
||||
kernels/opencl/kernel_shadow_blocked_ao.cl
|
||||
kernels/opencl/kernel_shadow_blocked_dl.cl
|
||||
kernels/opencl/kernel_next_iteration_setup.cl
|
||||
kernels/opencl/kernel_sum_all_radiance.cl
|
||||
kernels/opencl/kernel_indirect_subsurface.cl
|
||||
kernels/opencl/kernel_buffer_update.cl
|
||||
kernels/cuda/kernel.cu
|
||||
kernels/cuda/kernel_split.cu
|
||||
)
|
||||
|
||||
set(SRC_BVH_HEADERS
|
||||
@@ -68,6 +74,7 @@ set(SRC_HEADERS
|
||||
kernel_path_common.h
|
||||
kernel_path_state.h
|
||||
kernel_path_surface.h
|
||||
kernel_path_subsurface.h
|
||||
kernel_path_volume.h
|
||||
kernel_projection.h
|
||||
kernel_queues.h
|
||||
@@ -88,6 +95,10 @@ set(SRC_KERNELS_CPU_HEADERS
|
||||
kernels/cpu/kernel_cpu_image.h
|
||||
)
|
||||
|
||||
set(SRC_KERNELS_CUDA_HEADERS
|
||||
kernels/cuda/kernel_config.h
|
||||
)
|
||||
|
||||
set(SRC_CLOSURE_HEADERS
|
||||
closure/alloc.h
|
||||
closure/bsdf.h
|
||||
@@ -182,6 +193,7 @@ set(SRC_UTIL_HEADERS
|
||||
../util/util_hash.h
|
||||
../util/util_math.h
|
||||
../util/util_math_fast.h
|
||||
../util/util_math_intersect.h
|
||||
../util/util_static_assert.h
|
||||
../util/util_transform.h
|
||||
../util/util_texture.h
|
||||
@@ -189,17 +201,25 @@ set(SRC_UTIL_HEADERS
|
||||
)
|
||||
|
||||
set(SRC_SPLIT_HEADERS
|
||||
split/kernel_background_buffer_update.h
|
||||
split/kernel_buffer_update.h
|
||||
split/kernel_data_init.h
|
||||
split/kernel_direct_lighting.h
|
||||
split/kernel_do_volume.h
|
||||
split/kernel_holdout_emission_blurring_pathtermination_ao.h
|
||||
split/kernel_indirect_background.h
|
||||
split/kernel_indirect_subsurface.h
|
||||
split/kernel_lamp_emission.h
|
||||
split/kernel_next_iteration_setup.h
|
||||
split/kernel_path_init.h
|
||||
split/kernel_queue_enqueue.h
|
||||
split/kernel_scene_intersect.h
|
||||
split/kernel_shader_eval.h
|
||||
split/kernel_shadow_blocked.h
|
||||
split/kernel_shadow_blocked_ao.h
|
||||
split/kernel_shadow_blocked_dl.h
|
||||
split/kernel_split_common.h
|
||||
split/kernel_sum_all_radiance.h
|
||||
split/kernel_split_data.h
|
||||
split/kernel_split_data_types.h
|
||||
split/kernel_subsurface_scatter.h
|
||||
)
|
||||
|
||||
# CUDA module
|
||||
@@ -227,8 +247,9 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
endif()
|
||||
|
||||
# build for each arch
|
||||
set(cuda_sources kernels/cuda/kernel.cu
|
||||
set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
|
||||
${SRC_HEADERS}
|
||||
${SRC_KERNELS_CUDA_HEADERS}
|
||||
${SRC_BVH_HEADERS}
|
||||
${SRC_SVM_HEADERS}
|
||||
${SRC_GEOM_HEADERS}
|
||||
@@ -237,15 +258,22 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
)
|
||||
set(cuda_cubins)
|
||||
|
||||
macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
|
||||
if(${experimental})
|
||||
set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__")
|
||||
set(cuda_cubin kernel_experimental_${arch}.cubin)
|
||||
macro(CYCLES_CUDA_KERNEL_ADD arch split experimental)
|
||||
if(${split})
|
||||
set(cuda_extra_flags "-D__SPLIT__")
|
||||
set(cuda_cubin kernel_split)
|
||||
else()
|
||||
set(cuda_extra_flags "")
|
||||
set(cuda_cubin kernel_${arch}.cubin)
|
||||
set(cuda_cubin kernel)
|
||||
endif()
|
||||
|
||||
if(${experimental})
|
||||
set(cuda_extra_flags ${cuda_extra_flags} -D__KERNEL_EXPERIMENTAL__)
|
||||
set(cuda_cubin ${cuda_cubin}_experimental)
|
||||
endif()
|
||||
|
||||
set(cuda_cubin ${cuda_cubin}_${arch}.cubin)
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(cuda_debug_flags "-D__KERNEL_DEBUG__")
|
||||
else()
|
||||
@@ -258,13 +286,19 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
|
||||
if(split)
|
||||
set(cuda_kernel_src "/kernels/cuda/kernel_split.cu")
|
||||
else()
|
||||
set(cuda_kernel_src "/kernels/cuda/kernel.cu")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${cuda_nvcc_command}
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
-m${CUDA_BITS}
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
|
||||
--ptxas-options="-v"
|
||||
${cuda_arch_flags}
|
||||
@@ -272,8 +306,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
${cuda_math_flags}
|
||||
${cuda_extra_flags}
|
||||
${cuda_debug_flags}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../util
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/svm
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
-DCCL_NAMESPACE_BEGIN=
|
||||
-DCCL_NAMESPACE_END=
|
||||
-DNVCC
|
||||
@@ -291,7 +324,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
# Compile regular kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE FALSE)
|
||||
|
||||
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
|
||||
# Compile split kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} TRUE FALSE)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
|
||||
@@ -309,36 +347,50 @@ endif()
|
||||
include_directories(${INC})
|
||||
include_directories(SYSTEM ${INC_SYS})
|
||||
|
||||
set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
|
||||
|
||||
if(CXX_HAS_SSE)
|
||||
list(APPEND SRC
|
||||
kernels/cpu/kernel_sse2.cpp
|
||||
kernels/cpu/kernel_sse3.cpp
|
||||
kernels/cpu/kernel_sse41.cpp
|
||||
kernels/cpu/kernel_split_sse2.cpp
|
||||
kernels/cpu/kernel_split_sse3.cpp
|
||||
kernels/cpu/kernel_split_sse41.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_AVX)
|
||||
list(APPEND SRC
|
||||
kernels/cpu/kernel_avx.cpp
|
||||
kernels/cpu/kernel_split_avx.cpp
|
||||
)
|
||||
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_AVX2)
|
||||
list(APPEND SRC
|
||||
kernels/cpu/kernel_avx2.cpp
|
||||
kernels/cpu/kernel_split_avx2.cpp
|
||||
)
|
||||
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
add_library(cycles_kernel
|
||||
${SRC}
|
||||
${SRC_HEADERS}
|
||||
${SRC_KERNELS_CPU_HEADERS}
|
||||
${SRC_KERNELS_CUDA_HEADERS}
|
||||
${SRC_BVH_HEADERS}
|
||||
${SRC_CLOSURE_HEADERS}
|
||||
${SRC_SVM_HEADERS}
|
||||
@@ -360,24 +412,33 @@ endif()
|
||||
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
|
||||
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/split)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/split)
|
||||
|
||||
|
@@ -27,43 +27,43 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#include "bvh_types.h"
|
||||
#include "kernel/bvh/bvh_types.h"
|
||||
|
||||
/* Common QBVH functions. */
|
||||
#ifdef __QBVH__
|
||||
# include "qbvh_nodes.h"
|
||||
# include "kernel/bvh/qbvh_nodes.h"
|
||||
#endif
|
||||
|
||||
/* Regular BVH traversal */
|
||||
|
||||
#include "bvh_nodes.h"
|
||||
#include "kernel/bvh/bvh_nodes.h"
|
||||
|
||||
#define BVH_FUNCTION_NAME bvh_intersect
|
||||
#define BVH_FUNCTION_FEATURES 0
|
||||
#include "bvh_traversal.h"
|
||||
#include "kernel/bvh/bvh_traversal.h"
|
||||
|
||||
#if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
|
||||
# include "bvh_traversal.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if defined(__HAIR__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_hair
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
|
||||
# include "bvh_traversal.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
|
||||
# include "bvh_traversal.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
|
||||
# include "bvh_traversal.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
/* Subsurface scattering BVH traversal */
|
||||
@@ -71,12 +71,12 @@ CCL_NAMESPACE_BEGIN
|
||||
#if defined(__SUBSURFACE__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_subsurface
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "bvh_subsurface.h"
|
||||
# include "kernel/bvh/bvh_subsurface.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
|
||||
# include "bvh_subsurface.h"
|
||||
# include "kernel/bvh/bvh_subsurface.h"
|
||||
# endif
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
@@ -85,18 +85,18 @@ CCL_NAMESPACE_BEGIN
|
||||
#if defined(__VOLUME__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "bvh_volume.h"
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
|
||||
# include "bvh_volume.h"
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
|
||||
# include "bvh_volume.h"
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
# endif
|
||||
#endif /* __VOLUME__ */
|
||||
|
||||
@@ -105,30 +105,30 @@ CCL_NAMESPACE_BEGIN
|
||||
#if defined(__SHADOW_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
|
||||
# define BVH_FUNCTION_FEATURES 0
|
||||
# include "bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
|
||||
# include "bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__HAIR__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
|
||||
# include "bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
|
||||
# include "bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
|
||||
# include "bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
@@ -137,18 +137,18 @@ CCL_NAMESPACE_BEGIN
|
||||
#if defined(__VOLUME_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "bvh_volume_all.h"
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
|
||||
# include "bvh_volume_all.h"
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
|
||||
# include "bvh_volume_all.h"
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
# endif
|
||||
#endif /* __VOLUME_RECORD_ALL__ */
|
||||
|
||||
@@ -202,8 +202,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
|
||||
ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
const Ray ray,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
int subsurface_object,
|
||||
uint *lcg_state,
|
||||
@@ -212,7 +213,7 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
|
||||
#ifdef __OBJECT_MOTION__
|
||||
if(kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_subsurface_motion(kg,
|
||||
ray,
|
||||
&ray,
|
||||
ss_isect,
|
||||
subsurface_object,
|
||||
lcg_state,
|
||||
@@ -220,7 +221,7 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
|
||||
}
|
||||
#endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_subsurface(kg,
|
||||
ray,
|
||||
&ray,
|
||||
ss_isect,
|
||||
subsurface_object,
|
||||
lcg_state,
|
||||
@@ -229,30 +230,63 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
|
||||
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
int skip_object,
|
||||
uint max_hits,
|
||||
uint *num_hits)
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if(kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if(kernel_data.bvh.have_curves)
|
||||
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits);
|
||||
if(kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(kg,
|
||||
ray,
|
||||
isect,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits);
|
||||
return bvh_intersect_shadow_all_motion(kg,
|
||||
ray,
|
||||
isect,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if(kernel_data.bvh.have_curves)
|
||||
return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits);
|
||||
if(kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(kg,
|
||||
ray,
|
||||
isect,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
# ifdef __INSTANCING__
|
||||
if(kernel_data.bvh.have_instancing)
|
||||
return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
|
||||
if(kernel_data.bvh.have_instancing) {
|
||||
return bvh_intersect_shadow_all_instancing(kg,
|
||||
ray,
|
||||
isect,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
# endif /* __INSTANCING__ */
|
||||
|
||||
return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
|
||||
return bvh_intersect_shadow_all(kg,
|
||||
ray,
|
||||
isect,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
@@ -357,7 +391,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
|
||||
#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__))
|
||||
/* ToDo: Move to another file? */
|
||||
ccl_device int intersections_compare(const void *a, const void *b)
|
||||
{
|
||||
@@ -373,5 +407,28 @@ ccl_device int intersections_compare(const void *a, const void *b)
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
#if defined(__SHADOW_RECORD_ALL__)
|
||||
ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
|
||||
{
|
||||
#ifdef __KERNEL_GPU__
|
||||
/* Use bubble sort which has more friendly memory pattern on GPU. */
|
||||
bool swapped;
|
||||
do {
|
||||
swapped = false;
|
||||
for(int j = 0; j < num_hits - 1; ++j) {
|
||||
if(hits[j].t > hits[j + 1].t) {
|
||||
struct Intersection tmp = hits[j];
|
||||
hits[j] = hits[j + 1];
|
||||
hits[j + 1] = tmp;
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
--num_hits;
|
||||
} while(swapped);
|
||||
#else
|
||||
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
|
||||
#endif
|
||||
}
|
||||
#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -17,8 +17,8 @@
|
||||
// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
|
||||
// 3-vector which might be faster.
|
||||
ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
|
||||
int node_addr,
|
||||
int child)
|
||||
int node_addr,
|
||||
int child)
|
||||
{
|
||||
Transform space;
|
||||
const int child_addr = node_addr + child * 3;
|
||||
@@ -31,12 +31,12 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
|
||||
|
||||
#if !defined(__KERNEL_SSE2__)
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
|
||||
/* fetch node data */
|
||||
@@ -78,14 +78,14 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
|
||||
/* fetch node data */
|
||||
@@ -203,13 +203,13 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
int mask = 0;
|
||||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
||||
@@ -233,15 +233,15 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
int mask = 0;
|
||||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
||||
@@ -265,13 +265,13 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
||||
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
||||
@@ -296,15 +296,15 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
||||
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
||||
@@ -442,19 +442,19 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
|
||||
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
||||
|
||||
float3 aligned_dir0 = transform_direction(&space0, dir),
|
||||
aligned_dir1 = transform_direction(&space1, dir);;
|
||||
aligned_dir1 = transform_direction(&space1, dir);
|
||||
float3 aligned_P0 = transform_point(&space0, P),
|
||||
aligned_P1 = transform_point(&space1, P);
|
||||
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
||||
@@ -503,20 +503,20 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const float difl,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const float difl,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
|
||||
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
||||
|
||||
float3 aligned_dir0 = transform_direction(&space0, dir),
|
||||
aligned_dir1 = transform_direction(&space1, dir);;
|
||||
aligned_dir1 = transform_direction(&space1, dir);
|
||||
float3 aligned_P0 = transform_point(&space0, P),
|
||||
aligned_P1 = transform_point(&space1, P);
|
||||
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
||||
@@ -574,17 +574,17 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const ssef& tsplat,
|
||||
const ssef Psplat[3],
|
||||
const ssef idirsplat[3],
|
||||
const shuffle_swap_t shufflexyz[3],
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const ssef& tsplat,
|
||||
const ssef Psplat[3],
|
||||
const ssef idirsplat[3],
|
||||
const shuffle_swap_t shufflexyz[3],
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
||||
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
||||
@@ -612,19 +612,19 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const ssef& tsplat,
|
||||
const ssef Psplat[3],
|
||||
const ssef idirsplat[3],
|
||||
const shuffle_swap_t shufflexyz[3],
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
const ssef& isect_near,
|
||||
const ssef& isect_far,
|
||||
const ssef& tsplat,
|
||||
const ssef Psplat[3],
|
||||
const ssef idirsplat[3],
|
||||
const shuffle_swap_t shufflexyz[3],
|
||||
const float difl,
|
||||
const float extmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
||||
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
||||
|
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
#ifdef __QBVH__
|
||||
# include "qbvh_shadow_all.h"
|
||||
# include "kernel/bvh/qbvh_shadow_all.h"
|
||||
#endif
|
||||
|
||||
#if BVH_FEATURE(BVH_HAIR)
|
||||
@@ -45,6 +45,7 @@ ccl_device_inline
|
||||
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect_array,
|
||||
const int skip_object,
|
||||
const uint max_hits,
|
||||
uint *num_hits)
|
||||
{
|
||||
@@ -100,9 +101,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif /* __KERNEL_SSE2__ */
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@@ -189,6 +187,16 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
while(prim_addr < prim_addr2) {
|
||||
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
|
||||
|
||||
#ifdef __SHADOW_TRICKS__
|
||||
uint tri_object = (object == OBJECT_NONE)
|
||||
? kernel_tex_fetch(__prim_object, prim_addr)
|
||||
: object;
|
||||
if(tri_object == skip_object) {
|
||||
++prim_addr;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool hit;
|
||||
|
||||
/* todo: specialized intersect functions which don't fill in
|
||||
@@ -198,9 +206,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
switch(p_type) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
PATH_RAY_SHADOW,
|
||||
object,
|
||||
prim_addr);
|
||||
@@ -309,12 +317,11 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
num_hits_in_instance = 0;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
@@ -354,22 +361,17 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* scale isect->t to adjust for instancing */
|
||||
for(int i = 0; i < num_hits_in_instance; i++) {
|
||||
(isect_array-i-1)->t *= t_fac;
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
}
|
||||
|
||||
isect_t = tmax;
|
||||
@@ -400,6 +402,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect_array,
|
||||
const int skip_object,
|
||||
const uint max_hits,
|
||||
uint *num_hits)
|
||||
{
|
||||
@@ -408,6 +411,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
|
||||
ray,
|
||||
isect_array,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
@@ -418,6 +422,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
|
||||
ray,
|
||||
isect_array,
|
||||
skip_object,
|
||||
max_hits,
|
||||
num_hits);
|
||||
}
|
||||
|
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
#ifdef __QBVH__
|
||||
# include "qbvh_subsurface.h"
|
||||
# include "kernel/bvh/qbvh_subsurface.h"
|
||||
#endif
|
||||
|
||||
#if BVH_FEATURE(BVH_HAIR)
|
||||
@@ -75,16 +75,16 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
&isect_t,
|
||||
&ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
isect_t,
|
||||
&ob_itfm);
|
||||
#else
|
||||
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
|
||||
#endif
|
||||
object = subsurface_object;
|
||||
}
|
||||
@@ -109,9 +109,6 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@@ -197,9 +194,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
triangle_intersect_subsurface(kg,
|
||||
&isect_precalc,
|
||||
ss_isect,
|
||||
P,
|
||||
dir,
|
||||
object,
|
||||
prim_addr,
|
||||
isect_t,
|
||||
|
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
#ifdef __QBVH__
|
||||
# include "qbvh_traversal.h"
|
||||
# include "kernel/bvh/qbvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if BVH_FEATURE(BVH_HAIR)
|
||||
@@ -104,9 +104,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@@ -238,9 +235,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
visibility,
|
||||
object,
|
||||
prim_addr))
|
||||
@@ -354,11 +351,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
# if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
@@ -391,11 +387,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* instance pop */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
# if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
|
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
#ifdef __QBVH__
|
||||
# include "qbvh_volume.h"
|
||||
# include "kernel/bvh/qbvh_volume.h"
|
||||
#endif
|
||||
|
||||
#if BVH_FEATURE(BVH_HAIR)
|
||||
@@ -97,9 +97,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@@ -194,9 +191,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
continue;
|
||||
}
|
||||
triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
visibility,
|
||||
object,
|
||||
prim_addr);
|
||||
@@ -238,13 +235,11 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
# if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
Psplat[1] = ssef(P.y);
|
||||
@@ -281,13 +276,11 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* instance pop */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
# if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
Psplat[1] = ssef(P.y);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user