Compare commits
468 Commits
node-group
...
geometry-n
Author | SHA1 | Date | |
---|---|---|---|
c396f41e14 | |||
a9f02cd8d8 | |||
1e81e27557 | |||
8523e361e8 | |||
05ddbc20b8 | |||
8e967cfeaf | |||
8fbf0a79fc | |||
f307b5ae65 | |||
15f9e42c4f | |||
8b6777edc2 | |||
0e82510ea2 | |||
21b51e7b88 | |||
a04b39faf4 | |||
c547ff1ebd | |||
968ecf6f8b | |||
7bf56e5c75 | |||
271ddc303d | |||
68f8253c71 | |||
29a4903eb8 | |||
35a8341d7b | |||
b21695a507 | |||
b44dace9d8 | |||
5e76622f47 | |||
bc338aac74 | |||
0dd16758f0 | |||
3ee21d1098 | |||
88f6d584ca | |||
e6ec1e4baf | |||
09a2b5c70f | |||
3650b36141 | |||
bebb17a973 | |||
fc288ec856 | |||
99f5e60b86 | |||
680a54c7d0 | |||
25138fd6e0 | |||
0865f80591 | |||
4134682ec2 | |||
c18351f670 | |||
fdf920bf5d | |||
19dbe049db | |||
5721b34e53 | |||
417b62522d | |||
fc749d9d25 | |||
54ce0ac922 | |||
62806012ed | |||
ae24fe56a3 | |||
04faf12bd8 | |||
8e69b41bdf | |||
491f098edf | |||
f6ec11741c | |||
4babb7c02e | |||
0d9fa73b42 | |||
7e92fb92ec | |||
13d30b0481 | |||
c732d901a7 | |||
f04a7a07e3 | |||
82ca3d3604 | |||
2f4a8ecf18 | |||
b2c822065c | |||
5c4b0c98d3 | |||
0e23aef6b6 | |||
4d34028ce9 | |||
475f9a3e23 | |||
100f37af49 | |||
7ce10ebbbf | |||
770b193253 | |||
0fa68d1a01 | |||
6d35e1c238 | |||
fe7815e117 | |||
60ced5283a | |||
4054d76749 | |||
88b125e75d | |||
b69f8de5b5 | |||
dda4c0721c | |||
397a14deff | |||
3d6117994c | |||
335d32153e | |||
62cc09f267 | |||
92f79e002e | |||
3f0d2cf9e1 | |||
80fdf4a88d | |||
716b9cff23 | |||
945d71b56b | |||
5ca7e1301f | |||
a32fb96311 | |||
3e41b98295 | |||
b633b460b8 | |||
639ec2e5a9 | |||
b778e09492 | |||
7884de02f3 | |||
95296dc3aa | |||
9be0304b67 | |||
b6e527febb | |||
373cfa731f | |||
7cc7cd0e80 | |||
5294758830 | |||
80edd10168 | |||
f87e474af0 | |||
911f9bea84 | |||
e05cbad0d1 | |||
2ab500c234 | |||
7535ab412a | |||
10d175e223 | |||
60bb57663a | |||
9344deed89 | |||
c6d4de9e49 | |||
199c7da06d | |||
acb34c718e | |||
097b9c5a36 | |||
98ccee78fe | |||
d5757a0a10 | |||
45c0762f1b | |||
599e52119f | |||
a7422f3cd7 | |||
d6abd2ce72 | |||
91a29c9b9a | |||
5ab48a53e4 | |||
b647c2b88d | |||
86611a5fcc | |||
7f2c7feaee | |||
c0f7801660 | |||
a5140712cc | |||
e45ed69349 | |||
19ac02767c | |||
fd10ecaeaf | |||
187998970a | |||
c872b6b930 | |||
3c34b13cf8 | |||
9e63c3cee8 | |||
a20f45bab9 | |||
63c20e08c4 | |||
4f7dc1e4b6 | |||
8ed543c6f2 | |||
daaed83a32 | |||
7982d86117 | |||
33e5cd4e2f | |||
8365bce958 | |||
b0ec4d889a | |||
40c76a1945 | |||
c10e8e4166 | |||
ed590e9181 | |||
26a194abbd | |||
9e6757f20f | |||
61fe8da989 | |||
643f8bcedd | |||
f8e23e495b | |||
7f241fc773 | |||
6d2351d26b | |||
846d78b09a | |||
b132118f89 | |||
88f5dd3c72 | |||
eb2867de90 | |||
![]() |
0fd14d659b | ||
1469613d65 | |||
95bc1dd0e5 | |||
4382a0b350 | |||
70892e82ac | |||
9821a2d397 | |||
3f8c995109 | |||
887022257d | |||
5cdf0c9ee9 | |||
66b4e426cc | |||
72aeee96ac | |||
f7ba61d3a6 | |||
70d854538b | |||
694f792ee1 | |||
4edcae75aa | |||
954c6c0ae6 | |||
3a72442f63 | |||
5bb3a3f157 | |||
d818d05415 | |||
7c927155b5 | |||
63f309df11 | |||
6f26bb6841 | |||
66158498de | |||
e41cd795a6 | |||
25747301db | |||
b623be3377 | |||
77268dbe3b | |||
7b4d71683f | |||
58b1c54671 | |||
01c6824eaf | |||
8981bb4ac6 | |||
732fa26413 | |||
4d7a7ce67c | |||
e4926b4b2a | |||
6e75581e65 | |||
664b31ea73 | |||
4d75f10a8a | |||
982392ca13 | |||
ab8acbbfe5 | |||
c234a802ba | |||
7bb8c8a5cf | |||
c615ccde68 | |||
1bb77d9eae | |||
302eb1e0d7 | |||
c4c1cc7cd3 | |||
2f743b0a92 | |||
29f137e138 | |||
96fa5fc2b3 | |||
df096eab77 | |||
09b770388a | |||
870930bc32 | |||
74eda0b6fc | |||
92919864a0 | |||
cff94a808e | |||
a8feb20e1c | |||
b1703bd902 | |||
c041a36286 | |||
315cc66bd8 | |||
b75b734969 | |||
915b8b6093 | |||
a16bcb6576 | |||
0bc957063c | |||
38bf3e1911 | |||
48979c6cdc | |||
e45746591b | |||
2a4323c2f5 | |||
f588a0596b | |||
fe7540d39a | |||
62d9e55eec | |||
0ed0165eea | |||
c8435185e1 | |||
dfa42c614f | |||
5491563e59 | |||
6e59d0b20f | |||
3a02d760f7 | |||
15f464019a | |||
348f57bcec | |||
8f3796e90a | |||
1d8389cd09 | |||
c7d80b8c70 | |||
0b1fb22f69 | |||
153cb7e1df | |||
e78c3c9d96 | |||
7bd7043a74 | |||
2fade47a9d | |||
![]() |
bd86e719ab | ||
4563a47ac5 | |||
![]() |
c08a9ec19f | ||
45ef51d0fb | |||
9d4949f80b | |||
de7e3454fb | |||
254d148458 | |||
e1571cb105 | |||
5f40118899 | |||
6cc2c16d06 | |||
6722f90734 | |||
b827c8cd1e | |||
8afb8db66e | |||
cffc9bdb93 | |||
bb25302fc3 | |||
b601ae87d0 | |||
e078419c9c | |||
6a0b90bc92 | |||
1924045142 | |||
db47f82626 | |||
b86fc55d30 | |||
15683d81be | |||
8df6974a15 | |||
7d4edcfa68 | |||
62548acb1a | |||
c43d493cce | |||
bfd1836861 | |||
910f60de4c | |||
495f679246 | |||
0cb17a7036 | |||
d633d9fd02 | |||
dcb3b1c1f9 | |||
fed463df78 | |||
988f23cec3 | |||
80f3f59555 | |||
3f31ac2e1a | |||
2b4a62fa18 | |||
23bce32888 | |||
a1cc15f239 | |||
4d1acf42e9 | |||
e0a3fcb622 | |||
10f20bf5d5 | |||
c9258e6e19 | |||
c63b2e5187 | |||
4c793a5b20 | |||
786734e6c8 | |||
10b7d4f601 | |||
bf6f69399f | |||
fba960301f | |||
40683e524c | |||
e9d4e571d0 | |||
84e216fcee | |||
1b94e60fb0 | |||
33bfbb2a0c | |||
86b39e0aac | |||
71c4b7f1d0 | |||
26dc9f90d2 | |||
2745cacd95 | |||
dc1b36f288 | |||
26aa1b1367 | |||
37b7702d74 | |||
1633fca4a4 | |||
a3e954e0a7 | |||
cbc5b17c1a | |||
a76c714c26 | |||
c22fed5c01 | |||
c38d259779 | |||
8497737d92 | |||
c385369e07 | |||
4ebe696e85 | |||
b180fea69e | |||
a066d62798 | |||
c226954d03 | |||
9e4f58a8eb | |||
39bcf6bdc9 | |||
29c2722753 | |||
89aa86cb0a | |||
3fadaa4fca | |||
dda9c59044 | |||
97c05aa288 | |||
bdd6e617ea | |||
fa13058fa6 | |||
71ed98debe | |||
197e9b9f80 | |||
a7bee90c1d | |||
cef128e68a | |||
baeb386410 | |||
5ba35b3d15 | |||
936e608382 | |||
1db918f948 | |||
7eee378ecc | |||
7e764ec692 | |||
aa6e95281f | |||
c26083b6be | |||
e5d50b1787 | |||
34739f6a6d | |||
a899d57e57 | |||
0e5c941049 | |||
ed4374f089 | |||
0a270e3513 | |||
684dcd3680 | |||
a7462f58d1 | |||
11ad851fbe | |||
ad5ec544c8 | |||
61cb302dd5 | |||
678dc456e3 | |||
14905cd1d5 | |||
6482f9fffe | |||
84e72e8170 | |||
ab64bd264a | |||
b0edd63752 | |||
741d8dc1e2 | |||
4bccbceb34 | |||
35071af465 | |||
d48939f103 | |||
5ba30e07f2 | |||
ecfdbaef9b | |||
2b565c6bd0 | |||
a277117b3e | |||
e665a50fb6 | |||
c6d8da0e97 | |||
98bc439e47 | |||
3d8d142205 | |||
d98988d872 | |||
b14668ae03 | |||
f04787e87b | |||
0351ce9769 | |||
c4b3e0e0bb | |||
2ddddf6e36 | |||
b652bcbe92 | |||
8b47a252b1 | |||
d9fb08133a | |||
753af18573 | |||
7b62cc943d | |||
e5c63abece | |||
e21afd2f2f | |||
3bad7a51cb | |||
ecb91d46a8 | |||
f36dd06609 | |||
b2e508f7af | |||
a90f02d5f4 | |||
9241ab1d7c | |||
6f56fee3bb | |||
9393c2aba9 | |||
508fd044b4 | |||
38567bc023 | |||
8fa664fd33 | |||
1b24140d9f | |||
aeea690e00 | |||
6ecb1cb780 | |||
988241e23e | |||
a2cee52617 | |||
a0ed3601c9 | |||
8226abc111 | |||
![]() |
807be888a5 | ||
f92a85d7d2 | |||
6bd6d7aec7 | |||
cfb77c54b0 | |||
0d18005d2b | |||
06525747c0 | |||
74171ff3b0 | |||
32a7384c0b | |||
54942e5ea6 | |||
5d1ed47d6c | |||
bb8cbf0c10 | |||
b3386868fe | |||
c9bd78890a | |||
a12614d166 | |||
0403d77a0f | |||
fe7a0ebce4 | |||
af7c34716b | |||
![]() |
34f307547b | ||
bb9b2b556f | |||
a3a132ea74 | |||
ac59dfeffd | |||
24523726d7 | |||
20cab8f8f2 | |||
f9b621a9d9 | |||
![]() |
f1b16f3ceb | ||
cc9d9c7724 | |||
f1f2ff1116 | |||
6b26b0db21 | |||
8848cfdf4b | |||
e50d567c97 | |||
496e344015 | |||
c30718ded9 | |||
1965e31d17 | |||
5e384860a6 | |||
468f43c7a6 | |||
50a2c77c4e | |||
c82b1aa1c0 | |||
3f1027567d | |||
ab8d77359b | |||
3ef95d7f19 | |||
02a264f5ab | |||
2525c1c023 | |||
620b190e52 | |||
657ffe9aa7 | |||
3fcf50d37a | |||
49e8218edf | |||
94e6f87ebc | |||
3dcb437d5c | |||
ffe0db184a | |||
8648cf4717 | |||
c9958c8e9f | |||
24e2d08b49 | |||
bb732c240d | |||
850aa3d26a | |||
1ba264d5f0 | |||
92a1234830 | |||
afe5d0b9f2 | |||
cff291d1f3 | |||
bbcdca1378 | |||
a0caa03942 | |||
7469e19446 | |||
0019d6cc8f | |||
e9c3e4f14e | |||
bdd71c129c | |||
18b2ec1963 | |||
97df619be7 | |||
3059f1743e | |||
4726803e85 | |||
fa277178e8 | |||
7cf192956b | |||
f7cf6e957d | |||
b5ea0d2f41 | |||
c2a632cd41 | |||
7ad2b93ec4 | |||
6d930d0b4a | |||
f55f2b5ff4 | |||
5aaa435ac7 |
@@ -521,7 +521,8 @@ endif()
|
||||
if(NOT APPLE)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
# Radeon VII (gfx906) not currently working with HIP SDK, so left out of the list.
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||
endif()
|
||||
@@ -1580,6 +1581,8 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_MISSING_NORETURN -Wno-missing-noreturn)
|
||||
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_UNUSED_BUT_SET_VARIABLE -Wno-unused-but-set-variable)
|
||||
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_DEPRECATED_DECLARATIONS -Wno-deprecated-declarations)
|
||||
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes)
|
||||
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
|
||||
|
||||
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PARAMETER -Wno-unused-parameter)
|
||||
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PRIVATE_FIELD -Wno-unused-private-field)
|
||||
@@ -1593,6 +1596,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNDEFINED_VAR_TEMPLATE -Wno-undefined-var-template)
|
||||
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_INSTANTIATION_AFTER_SPECIALIZATION -Wno-instantiation-after-specialization)
|
||||
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_MISLEADING_INDENTATION -Wno-misleading-indentation)
|
||||
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
|
||||
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
|
||||
|
18
GNUmakefile
18
GNUmakefile
@@ -58,9 +58,6 @@ Static Source Code Checking
|
||||
* check_cppcheck: Run blender source through cppcheck (C & C++).
|
||||
* check_clang_array: Run blender source through clang array checking script (C & C++).
|
||||
* check_deprecated: Check if there is any deprecated code to remove.
|
||||
* check_splint: Run blenders source through splint (C only).
|
||||
* check_sparse: Run blenders source through sparse (C only).
|
||||
* check_smatch: Run blenders source through smatch (C only).
|
||||
* check_descriptions: Check for duplicate/invalid descriptions.
|
||||
* check_licenses: Check license headers follow the SPDX license specification,
|
||||
using one of the accepted licenses in 'doc/license/SPDX-license-identifiers.txt'
|
||||
@@ -474,21 +471,6 @@ check_clang_array: .FORCE
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang_array.py"
|
||||
|
||||
check_splint: .FORCE
|
||||
@$(CMAKE_CONFIG)
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_splint.py"
|
||||
|
||||
check_sparse: .FORCE
|
||||
@$(CMAKE_CONFIG)
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_sparse.py"
|
||||
|
||||
check_smatch: .FORCE
|
||||
@$(CMAKE_CONFIG)
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
|
||||
|
||||
check_mypy: .FORCE
|
||||
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"
|
||||
|
||||
|
@@ -90,28 +90,26 @@ include(cmake/haru.cmake)
|
||||
# Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
|
||||
include(cmake/boost.cmake)
|
||||
include(cmake/pugixml.cmake)
|
||||
include(cmake/ispc.cmake)
|
||||
include(cmake/openimagedenoise.cmake)
|
||||
include(cmake/embree.cmake)
|
||||
include(cmake/openpgl.cmake)
|
||||
include(cmake/fmt.cmake)
|
||||
include(cmake/robinmap.cmake)
|
||||
include(cmake/xml2.cmake)
|
||||
|
||||
include(cmake/fribidi.cmake)
|
||||
include(cmake/harfbuzz.cmake)
|
||||
if(NOT APPLE)
|
||||
include(cmake/xr_openxr.cmake)
|
||||
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
|
||||
include(cmake/dpcpp.cmake)
|
||||
include(cmake/dpcpp_deps.cmake)
|
||||
endif()
|
||||
include(cmake/dpcpp.cmake)
|
||||
include(cmake/dpcpp_deps.cmake)
|
||||
if(NOT WIN32)
|
||||
include(cmake/igc.cmake)
|
||||
include(cmake/gmmlib.cmake)
|
||||
include(cmake/ocloc.cmake)
|
||||
endif()
|
||||
endif()
|
||||
include(cmake/ispc.cmake)
|
||||
include(cmake/openimagedenoise.cmake)
|
||||
# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
|
||||
include(cmake/embree.cmake)
|
||||
include(cmake/openpgl.cmake)
|
||||
include(cmake/fmt.cmake)
|
||||
include(cmake/robinmap.cmake)
|
||||
include(cmake/xml2.cmake)
|
||||
|
||||
# OpenColorIO and dependencies.
|
||||
include(cmake/expat.cmake)
|
||||
|
@@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
|
||||
download_source(ICDLOADER)
|
||||
download_source(MP11)
|
||||
download_source(SPIRV_HEADERS)
|
||||
download_source(UNIFIED_RUNTIME)
|
||||
download_source(IGC)
|
||||
download_source(IGC_LLVM)
|
||||
download_source(IGC_OPENCL_CLANG)
|
||||
|
@@ -5,6 +5,9 @@
|
||||
# for now.
|
||||
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
|
||||
|
||||
# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
|
||||
string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
|
||||
|
||||
if(WIN32)
|
||||
set(LLVM_GENERATOR "Ninja")
|
||||
else()
|
||||
@@ -38,17 +41,18 @@ set(DPCPP_EXTRA_ARGS
|
||||
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
|
||||
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
|
||||
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
|
||||
-DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
|
||||
# Below here is copied from an invocation of buildbot/config.py
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON
|
||||
-DLLVM_TARGETS_TO_BUILD=X86
|
||||
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
|
||||
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
|
||||
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
|
||||
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
|
||||
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
|
||||
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
|
||||
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
|
||||
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
|
||||
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
|
||||
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
|
||||
-DLIBCLC_TARGETS_TO_BUILD=
|
||||
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
|
||||
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
|
||||
@@ -104,13 +108,19 @@ add_dependencies(
|
||||
external_mp11
|
||||
external_level-zero
|
||||
external_spirvheaders
|
||||
external_unifiedruntime
|
||||
)
|
||||
|
||||
if(BUILD_MODE STREQUAL Release AND WIN32)
|
||||
ExternalProject_Add_Step(external_dpcpp after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cl.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cpp.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld.lld.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld64.lld.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe
|
||||
)
|
||||
endif()
|
||||
|
@@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_unifiedruntime
|
||||
URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
|
||||
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/unifiedruntime
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
@@ -3,6 +3,8 @@
|
||||
# Note the utility apps may use png/tiff/gif system libraries, but the
|
||||
# library itself does not depend on them, so should give no problems.
|
||||
|
||||
set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
|
||||
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
-DEMBREE_ISPC_SUPPORT=OFF
|
||||
-DEMBREE_TUTORIALS=OFF
|
||||
@@ -31,6 +33,43 @@ if(NOT BLENDER_PLATFORM_ARM)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT APPLE)
|
||||
if(WIN32)
|
||||
# Levels below -O2 don't work well for Embree+SYCL.
|
||||
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
|
||||
string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
|
||||
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
|
||||
string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
|
||||
set(EMBREE_CMAKE_FLAGS
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_MODE}
|
||||
-DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
|
||||
-DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
|
||||
-DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
|
||||
-DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
|
||||
-DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
|
||||
-DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
|
||||
-DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
|
||||
-DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
|
||||
-DCMAKE_CXX_STANDARD=17
|
||||
)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
|
||||
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
|
||||
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
|
||||
-DEMBREE_SYCL_SUPPORT=ON
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
)
|
||||
else()
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
|
||||
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
|
||||
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
|
||||
-DEMBREE_SYCL_SUPPORT=ON
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(TBB_STATIC_LIBRARY)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
@@ -42,16 +81,25 @@ ExternalProject_Add(external_embree
|
||||
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
|
||||
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
|
||||
PREFIX ${BUILD_DIR}/embree
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/embree
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
external_embree
|
||||
external_tbb
|
||||
)
|
||||
if(NOT APPLE)
|
||||
add_dependencies(
|
||||
external_embree
|
||||
external_tbb
|
||||
external_dpcpp
|
||||
)
|
||||
else()
|
||||
add_dependencies(
|
||||
external_embree
|
||||
external_tbb
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
if(BUILD_MODE STREQUAL Release)
|
||||
@@ -66,6 +114,7 @@ if(WIN32)
|
||||
ExternalProject_Add_Step(external_embree after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
|
@@ -74,6 +74,27 @@ if(WIN32)
|
||||
set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
|
||||
# Set similar flags for CLANG compilation.
|
||||
set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
|
||||
|
||||
if(WITH_OPTIMIZED_DEBUG)
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
else()
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
endif()
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
|
||||
if(WITH_OPTIMIZED_DEBUG)
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
|
||||
else()
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
|
||||
endif()
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
|
||||
set(PLATFORM_FLAGS)
|
||||
set(PLATFORM_CXX_FLAGS)
|
||||
set(PLATFORM_CMAKE_FLAGS)
|
||||
|
@@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
|
||||
set(OPENPGL_HASH_TYPE SHA256)
|
||||
set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)
|
||||
|
||||
set(LEVEL_ZERO_VERSION v1.8.5)
|
||||
set(LEVEL_ZERO_VERSION v1.8.8)
|
||||
set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
|
||||
set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
|
||||
set(LEVEL_ZERO_HASH_TYPE SHA256)
|
||||
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
|
||||
set(DPCPP_VERSION 20221019)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
|
||||
set(DPCPP_VERSION 2022-12)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
|
||||
set(DPCPP_HASH_TYPE SHA256)
|
||||
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
||||
|
||||
@@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
||||
# will take care of building them, unpack is being done in dpcpp_deps.cmake
|
||||
|
||||
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
|
||||
set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
|
||||
set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
|
||||
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
|
||||
set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
|
||||
set(VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
@@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
|
||||
set(SPIRV_HEADERS_HASH_TYPE SHA256)
|
||||
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
|
||||
# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
|
||||
set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
|
||||
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
|
||||
set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
|
||||
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
|
||||
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
|
||||
|
||||
######################
|
||||
### DPCPP DEPS END ###
|
||||
######################
|
||||
@@ -730,9 +737,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
|
||||
set(GMMLIB_HASH_TYPE SHA256)
|
||||
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
||||
|
||||
set(OCLOC_VERSION 22.49.25018.21)
|
||||
set(OCLOC_VERSION 23.05.25593.18)
|
||||
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
||||
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
|
||||
set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
|
||||
set(OCLOC_HASH_TYPE SHA256)
|
||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||
|
||||
|
@@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
|
||||
external_dpcpp -- external_mp11;
|
||||
external_dpcpp -- external_level_zero;
|
||||
external_dpcpp -- external_spirvheaders;
|
||||
external_dpcpp -- external_unifiedruntime;
|
||||
external_embree -- external_tbb;
|
||||
external_ffmpeg -- external_zlib;
|
||||
external_ffmpeg -- external_openjpeg;
|
||||
|
@@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
|
||||
libsycldevice-obj
|
||||
libsycldevice-spv)
|
||||
|
||||
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
|
||||
index 17eeaafae194..09e6d2217aaa 100644
|
||||
--- a/sycl/source/detail/program_manager/program_manager.cpp
|
||||
+++ b/sycl/source/detail/program_manager/program_manager.cpp
|
||||
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
|
||||
}
|
||||
assert(BinImages.size() > 0 && "Expected to find at least one device image");
|
||||
|
||||
+ // Ignore images with incompatible state. Image is considered compatible
|
||||
+ // with a target state if an image is already in the target state or can
|
||||
+ // be brought to target state by compiling/linking/building.
|
||||
+ //
|
||||
+ // Example: an image in "executable" state is not compatible with
|
||||
+ // "input" target state - there is no operation to convert the image it
|
||||
+ // to "input" state. An image in "input" state is compatible with
|
||||
+ // "executable" target state because it can be built to get into
|
||||
+ // "executable" state.
|
||||
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
|
||||
+ if (getBinImageState(*It) > TargetState)
|
||||
+ It = BinImages.erase(It);
|
||||
+ else
|
||||
+ ++It;
|
||||
+ }
|
||||
+
|
||||
std::vector<device_image_plain> SYCLDeviceImages;
|
||||
- for (RTDeviceBinaryImage *BinImage : BinImages) {
|
||||
- const bundle_state ImgState = getBinImageState(BinImage);
|
||||
-
|
||||
- // Ignore images with incompatible state. Image is considered compatible
|
||||
- // with a target state if an image is already in the target state or can
|
||||
- // be brought to target state by compiling/linking/building.
|
||||
- //
|
||||
- // Example: an image in "executable" state is not compatible with
|
||||
- // "input" target state - there is no operation to convert the image it
|
||||
- // to "input" state. An image in "input" state is compatible with
|
||||
- // "executable" target state because it can be built to get into
|
||||
- // "executable" state.
|
||||
- if (ImgState > TargetState)
|
||||
- continue;
|
||||
|
||||
- for (const sycl::device &Dev : Devs) {
|
||||
+ // If a non-input state is requested, we can filter out some compatible
|
||||
+ // images and return only those with the highest compatible state for each
|
||||
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
|
||||
+ // image, so that any unneeded ones are skipped.
|
||||
+ // TODO this has no effect if the requested state is input, consider having
|
||||
+ // a separate branch for that case to avoid unnecessary tracking work.
|
||||
+ struct DeviceBinaryImageInfo {
|
||||
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
|
||||
+ bundle_state State = bundle_state::input;
|
||||
+ int RequirementCounter = 0;
|
||||
+ };
|
||||
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
|
||||
+
|
||||
+ for (const sycl::device &Dev : Devs) {
|
||||
+ // Track the highest image state for each requested kernel.
|
||||
+ using StateImagesPairT =
|
||||
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
|
||||
+ using KernelImageMapT =
|
||||
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
|
||||
+ KernelImageMapT KernelImageMap;
|
||||
+ if (!KernelIDs.empty())
|
||||
+ for (const kernel_id &KernelID : KernelIDs)
|
||||
+ KernelImageMap.insert({KernelID, {}});
|
||||
+
|
||||
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
|
||||
if (!compatibleWithDevice(BinImage, Dev) ||
|
||||
!doesDevSupportImgAspects(Dev, *BinImage))
|
||||
continue;
|
||||
|
||||
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
|
||||
- // Collect kernel names for the image
|
||||
- {
|
||||
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
|
||||
- KernelIDs = m_BinImg2KernelIDs[BinImage];
|
||||
- // If the image does not contain any non-service kernels we can skip it.
|
||||
- if (!KernelIDs || KernelIDs->empty())
|
||||
- continue;
|
||||
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
|
||||
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
|
||||
+ if (InsertRes.second) {
|
||||
+ ImgInfo.State = getBinImageState(BinImage);
|
||||
+ // Collect kernel names for the image
|
||||
+ {
|
||||
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
|
||||
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
|
||||
+ }
|
||||
}
|
||||
+ const bundle_state ImgState = ImgInfo.State;
|
||||
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
|
||||
+ ImgInfo.KernelIDs;
|
||||
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
|
||||
|
||||
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
|
||||
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
|
||||
+ // If the image does not contain any non-service kernels we can skip it.
|
||||
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
|
||||
+ continue;
|
||||
|
||||
- SYCLDeviceImages.push_back(
|
||||
- createSyclObjFromImpl<device_image_plain>(Impl));
|
||||
- break;
|
||||
+ // Update tracked information.
|
||||
+ for (kernel_id &KernelID : *ImageKernelIDs) {
|
||||
+ StateImagesPairT *StateImagesPair;
|
||||
+ // If only specific kernels are requested, ignore the rest.
|
||||
+ if (!KernelIDs.empty()) {
|
||||
+ auto It = KernelImageMap.find(KernelID);
|
||||
+ if (It == KernelImageMap.end())
|
||||
+ continue;
|
||||
+ StateImagesPair = &It->second;
|
||||
+ } else
|
||||
+ StateImagesPair = &KernelImageMap[KernelID];
|
||||
+
|
||||
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
|
||||
+
|
||||
+ if (KernelImages.empty()) {
|
||||
+ KernelImagesState = ImgState;
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ ++ImgRequirementCounter;
|
||||
+ } else if (KernelImagesState < ImgState) {
|
||||
+ for (RTDeviceBinaryImage *Img : KernelImages) {
|
||||
+ auto It = ImageInfoMap.find(Img);
|
||||
+ assert(It != ImageInfoMap.end());
|
||||
+ assert(It->second.RequirementCounter > 0);
|
||||
+ --(It->second.RequirementCounter);
|
||||
+ }
|
||||
+ KernelImages.clear();
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ KernelImagesState = ImgState;
|
||||
+ ++ImgRequirementCounter;
|
||||
+ } else if (KernelImagesState == ImgState) {
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ ++ImgRequirementCounter;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
+ for (const auto &ImgInfoPair : ImageInfoMap) {
|
||||
+ if (ImgInfoPair.second.RequirementCounter == 0)
|
||||
+ continue;
|
||||
+
|
||||
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
|
||||
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
|
||||
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
|
||||
+
|
||||
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
|
||||
+ }
|
||||
+
|
||||
return SYCLDeviceImages;
|
||||
}
|
||||
|
||||
|
@@ -149,3 +149,19 @@ index 074f910a2..30f490818 100644
|
||||
return is_hit_first | is_hit_second;
|
||||
}
|
||||
};
|
||||
diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
|
||||
--- a/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:23:06.429190200 +0200
|
||||
+++ b/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:35:01.291938600 +0200
|
||||
@@ -540,7 +540,12 @@
|
||||
assert(offset <= geomDescrData.size());
|
||||
}
|
||||
|
||||
+ /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
|
||||
+#if TBB_VERSION_MAJOR<2021
|
||||
+ RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
|
||||
+#else
|
||||
RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
|
||||
+#endif
|
||||
|
||||
/* estimate static accel size */
|
||||
BBox1f time_range(0,1);
|
||||
|
@@ -37,18 +37,24 @@ elseif(HIP_HIPCC_EXECUTABLE)
|
||||
set(HIP_VERSION_MINOR 0)
|
||||
set(HIP_VERSION_PATCH 0)
|
||||
|
||||
if(WIN32)
|
||||
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE}.bat)
|
||||
else()
|
||||
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
|
||||
endif()
|
||||
|
||||
# Get version from the output.
|
||||
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
|
||||
OUTPUT_VARIABLE HIP_VERSION_RAW
|
||||
execute_process(COMMAND ${_hipcc_executable} --version
|
||||
OUTPUT_VARIABLE _hip_version_raw
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Parse parts.
|
||||
if(HIP_VERSION_RAW MATCHES "HIP version: .*")
|
||||
if(_hip_version_raw MATCHES "HIP version: .*")
|
||||
# Strip the HIP prefix and get list of individual version components.
|
||||
string(REGEX REPLACE
|
||||
".*HIP version: ([.0-9]+).*" "\\1"
|
||||
HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
|
||||
HIP_SEMANTIC_VERSION "${_hip_version_raw}")
|
||||
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
|
||||
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
|
||||
|
||||
@@ -71,7 +77,13 @@ elseif(HIP_HIPCC_EXECUTABLE)
|
||||
|
||||
# Construct full semantic version.
|
||||
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
|
||||
unset(HIP_VERSION_RAW)
|
||||
unset(_hip_version_raw)
|
||||
unset(_hipcc_executable)
|
||||
else()
|
||||
set(HIP_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(HIP
|
||||
REQUIRED_VARS HIP_HIPCC_EXECUTABLE
|
||||
VERSION_VAR HIP_VERSION)
|
||||
|
@@ -108,7 +108,11 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL
|
||||
|
||||
IF(SYCL_FOUND)
|
||||
SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${SYCL_INCLUDE_DIR}/sycl)
|
||||
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
|
||||
IF(WIN32 AND SYCL_LIBRARY_DEBUG)
|
||||
SET(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
|
||||
ELSE()
|
||||
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
|
||||
ENDIF()
|
||||
ELSE()
|
||||
SET(SYCL_SYCL_FOUND FALSE)
|
||||
ENDIF()
|
||||
|
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
CHECKER_IGNORE_PREFIX = [
|
||||
"extern",
|
||||
"intern/moto",
|
||||
]
|
||||
|
||||
CHECKER_BIN = "smatch"
|
||||
CHECKER_ARGS = [
|
||||
"--full-path",
|
||||
"--two-passes",
|
||||
]
|
||||
|
||||
import project_source_info
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
USE_QUIET = (os.environ.get("QUIET", None) is not None)
|
||||
|
||||
|
||||
def main():
|
||||
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
|
||||
source_defines = project_source_info.build_defines_as_args()
|
||||
|
||||
check_commands = []
|
||||
for c, inc_dirs, defs in source_info:
|
||||
|
||||
cmd = ([CHECKER_BIN] +
|
||||
CHECKER_ARGS +
|
||||
[c] +
|
||||
[("-I%s" % i) for i in inc_dirs] +
|
||||
[("-D%s" % d) for d in defs] +
|
||||
source_defines
|
||||
)
|
||||
|
||||
check_commands.append((c, cmd))
|
||||
|
||||
def my_process(i, c, cmd):
|
||||
if not USE_QUIET:
|
||||
percent = 100.0 * (i / len(check_commands))
|
||||
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
|
||||
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("%s %s\n" % (percent_str, c))
|
||||
|
||||
return subprocess.Popen(cmd)
|
||||
|
||||
process_functions = []
|
||||
for i, (c, cmd) in enumerate(check_commands):
|
||||
process_functions.append((my_process, (i, c, cmd)))
|
||||
|
||||
project_source_info.queue_processes(process_functions)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
CHECKER_IGNORE_PREFIX = [
|
||||
"extern",
|
||||
"intern/moto",
|
||||
]
|
||||
|
||||
CHECKER_BIN = "sparse"
|
||||
CHECKER_ARGS = [
|
||||
]
|
||||
|
||||
import project_source_info
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
USE_QUIET = (os.environ.get("QUIET", None) is not None)
|
||||
|
||||
|
||||
def main():
|
||||
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
|
||||
source_defines = project_source_info.build_defines_as_args()
|
||||
|
||||
check_commands = []
|
||||
for c, inc_dirs, defs in source_info:
|
||||
|
||||
cmd = ([CHECKER_BIN] +
|
||||
CHECKER_ARGS +
|
||||
[c] +
|
||||
[("-I%s" % i) for i in inc_dirs] +
|
||||
[("-D%s" % d) for d in defs] +
|
||||
source_defines
|
||||
)
|
||||
|
||||
check_commands.append((c, cmd))
|
||||
|
||||
def my_process(i, c, cmd):
|
||||
if not USE_QUIET:
|
||||
percent = 100.0 * (i / len(check_commands))
|
||||
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
|
||||
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("%s %s\n" % (percent_str, c))
|
||||
|
||||
return subprocess.Popen(cmd)
|
||||
|
||||
process_functions = []
|
||||
for i, (c, cmd) in enumerate(check_commands):
|
||||
process_functions.append((my_process, (i, c, cmd)))
|
||||
|
||||
project_source_info.queue_processes(process_functions)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,86 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
CHECKER_IGNORE_PREFIX = [
|
||||
"extern",
|
||||
"intern/moto",
|
||||
]
|
||||
|
||||
CHECKER_BIN = "splint"
|
||||
|
||||
CHECKER_ARGS = [
|
||||
"-weak",
|
||||
"-posix-lib",
|
||||
"-linelen", "10000",
|
||||
"+ignorequals",
|
||||
"+relaxtypes",
|
||||
"-retvalother",
|
||||
"+matchanyintegral",
|
||||
"+longintegral",
|
||||
"+ignoresigns",
|
||||
"-nestcomment",
|
||||
"-predboolothers",
|
||||
"-ifempty",
|
||||
"-unrecogcomments",
|
||||
|
||||
# we may want to remove these later
|
||||
"-type",
|
||||
"-fixedformalarray",
|
||||
"-fullinitblock",
|
||||
"-fcnuse",
|
||||
"-initallelements",
|
||||
"-castfcnptr",
|
||||
# -forcehints,
|
||||
"-bufferoverflowhigh", # warns a lot about sprintf()
|
||||
|
||||
# re-definitions, rna causes most of these
|
||||
"-redef",
|
||||
"-syntax",
|
||||
|
||||
# dummy, witjout this splint complains with:
|
||||
# /usr/include/bits/confname.h:31:27: *** Internal Bug at cscannerHelp.c:2428: Unexpanded macro not function or constant: int _PC_MAX_CANON
|
||||
"-D_PC_MAX_CANON=0",
|
||||
]
|
||||
|
||||
|
||||
import project_source_info
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
USE_QUIET = (os.environ.get("QUIET", None) is not None)
|
||||
|
||||
|
||||
def main():
|
||||
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
|
||||
|
||||
check_commands = []
|
||||
for c, inc_dirs, defs in source_info:
|
||||
cmd = ([CHECKER_BIN] +
|
||||
CHECKER_ARGS +
|
||||
[c] +
|
||||
[("-I%s" % i) for i in inc_dirs] +
|
||||
[("-D%s" % d) for d in defs]
|
||||
)
|
||||
|
||||
check_commands.append((c, cmd))
|
||||
|
||||
def my_process(i, c, cmd):
|
||||
if not USE_QUIET:
|
||||
percent = 100.0 * (i / len(check_commands))
|
||||
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
|
||||
|
||||
sys.stdout.write("%s %s\n" % (percent_str, c))
|
||||
sys.stdout.flush()
|
||||
|
||||
return subprocess.Popen(cmd)
|
||||
|
||||
process_functions = []
|
||||
for i, (c, cmd) in enumerate(check_commands):
|
||||
process_functions.append((my_process, (i, c, cmd)))
|
||||
|
||||
project_source_info.queue_processes(process_functions)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -82,7 +82,7 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@@ -1384,4 +1384,3 @@ macro(windows_process_platform_bundled_libraries library_deps)
|
||||
endforeach()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
@@ -174,7 +174,7 @@ if(SYSTEMSTUBS_LIBRARY)
|
||||
list(APPEND PLATFORM_LINKLIBS SystemStubs)
|
||||
endif()
|
||||
|
||||
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing")
|
||||
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing -ffp-contract=off")
|
||||
set(PLATFORM_LINKFLAGS
|
||||
"-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio -framework Metal -framework QuartzCore"
|
||||
)
|
||||
|
@@ -803,8 +803,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
||||
# Automatically turned on when building with "-march=native". This is
|
||||
# explicitly turned off here as it will make floating point math give a bit
|
||||
# different results. This will lead to automated test failures. So disable
|
||||
# this until we support it. Seems to default to off in clang and the intel
|
||||
# compiler.
|
||||
# this until we support it.
|
||||
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
|
||||
|
||||
# `maybe-uninitialized` is unreliable in release builds, but fine in debug builds.
|
||||
@@ -815,64 +814,49 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
||||
string(PREPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO "${GCC_EXTRA_FLAGS_RELEASE} ")
|
||||
unset(GCC_EXTRA_FLAGS_RELEASE)
|
||||
|
||||
# NOTE(@campbellbarton): Eventually mold will be able to use `-fuse-ld=mold`,
|
||||
# however at the moment this only works for GCC 12.1+ (unreleased at time of writing).
|
||||
# So a workaround is used here "-B" which points to another path to find system commands
|
||||
# such as `ld`.
|
||||
if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
|
||||
find_program(MOLD_BIN "mold")
|
||||
mark_as_advanced(MOLD_BIN)
|
||||
|
||||
if(NOT MOLD_BIN)
|
||||
message(STATUS "The \"mold\" binary could not be found, using system linker.")
|
||||
set(WITH_LINKER_MOLD OFF)
|
||||
elseif(CMAKE_C_COMPILER_VERSION VERSION_LESS 12.1)
|
||||
message(STATUS "GCC 12.1 or newer is required for th MOLD linker.")
|
||||
set(WITH_LINKER_MOLD OFF)
|
||||
else()
|
||||
# By default mold installs the binary to:
|
||||
# - `{PREFIX}/bin/mold` as well as a symbolic-link in...
|
||||
# - `{PREFIX}/lib/mold/ld`.
|
||||
# (where `PREFIX` is typically `/usr/`).
|
||||
#
|
||||
# This block of code finds `{PREFIX}/lib/mold` from the `mold` binary.
|
||||
# Other methods of searching for the path could also be made to work,
|
||||
# we could even make our own directory and symbolic-link, however it's more
|
||||
# convenient to use the one provided by mold.
|
||||
#
|
||||
# Use the binary path to "mold", to find the common prefix which contains "lib/mold".
|
||||
# The parent directory: e.g. `/usr/bin/mold` -> `/usr/bin/`.
|
||||
get_filename_component(MOLD_PREFIX "${MOLD_BIN}" DIRECTORY)
|
||||
# The common prefix path: e.g. `/usr/bin/` -> `/usr/` to use as a hint.
|
||||
get_filename_component(MOLD_PREFIX "${MOLD_PREFIX}" DIRECTORY)
|
||||
# Find `{PREFIX}/lib/mold/ld`, store the directory component (without the `ld`).
|
||||
# Then pass `-B {PREFIX}/lib/mold` to GCC so the `ld` located there overrides the default.
|
||||
find_path(
|
||||
MOLD_BIN_DIR "ld"
|
||||
HINTS "${MOLD_PREFIX}"
|
||||
# The default path is `libexec`, Arch Linux for e.g.
|
||||
# replaces this with `lib` so check both.
|
||||
PATH_SUFFIXES "libexec/mold" "lib/mold" "lib64/mold"
|
||||
NO_DEFAULT_PATH
|
||||
NO_CACHE
|
||||
get_filename_component(MOLD_BIN_DIR "${MOLD_BIN}" DIRECTORY)
|
||||
# Check if the `-B` argument is required.
|
||||
# This will happen when `MOLD_BIN` points to a non-standard location.
|
||||
# Keep this option as mold is not yet a standard system component and
|
||||
# users may have it installed in some unexpected place.
|
||||
set(_mold_args "-fuse-ld=mold")
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} -B ${MOLD_BIN_DIR} ${_mold_args} -Wl,--version
|
||||
ERROR_QUIET OUTPUT_VARIABLE LD_VERSION_WITH_DIR
|
||||
)
|
||||
if(NOT MOLD_BIN_DIR)
|
||||
message(STATUS
|
||||
"The mold linker could not find the directory containing the linker command "
|
||||
"(typically "
|
||||
"\"${MOLD_PREFIX}/libexec/mold/ld\") or "
|
||||
"\"${MOLD_PREFIX}/lib/mold/ld\") using system linker."
|
||||
)
|
||||
set(WITH_LINKER_MOLD OFF)
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} ${_mold_args} -Wl,--version
|
||||
ERROR_QUIET OUTPUT_VARIABLE LD_VERSION
|
||||
)
|
||||
if(NOT (LD_VERSION STREQUAL LD_VERSION_WITH_DIR))
|
||||
string(PREPEND _mold_args "-B \"${MOLD_BIN_DIR}\" ")
|
||||
set(LD_VERSION "${LD_VERSION_WITH_DIR}")
|
||||
endif()
|
||||
unset(MOLD_PREFIX)
|
||||
endif()
|
||||
|
||||
if(WITH_LINKER_MOLD)
|
||||
# GCC will search for `ld` in this directory first.
|
||||
string(APPEND CMAKE_EXE_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
|
||||
string(APPEND CMAKE_SHARED_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
|
||||
string(APPEND CMAKE_MODULE_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
|
||||
set(_IS_LINKER_DEFAULT OFF)
|
||||
if("${LD_VERSION}" MATCHES "mold ")
|
||||
string(APPEND CMAKE_EXE_LINKER_FLAGS " ${_mold_args}")
|
||||
string(APPEND CMAKE_SHARED_LINKER_FLAGS " ${_mold_args}")
|
||||
string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${_mold_args}")
|
||||
set(_IS_LINKER_DEFAULT OFF)
|
||||
else()
|
||||
message(STATUS "GNU mold linker isn't available, using the default system linker.")
|
||||
endif()
|
||||
unset(_mold_args)
|
||||
unset(MOLD_BIN_DIR)
|
||||
unset(LD_VERSION)
|
||||
endif()
|
||||
unset(MOLD_BIN)
|
||||
unset(MOLD_BIN_DIR)
|
||||
endif()
|
||||
|
||||
if(WITH_LINKER_GOLD AND _IS_LINKER_DEFAULT)
|
||||
@@ -907,7 +891,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
||||
# CLang is the same as GCC for now.
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing")
|
||||
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
|
||||
|
||||
if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
|
||||
find_program(MOLD_BIN "mold")
|
||||
|
@@ -9,7 +9,7 @@ buildbot:
|
||||
cuda11:
|
||||
version: '11.4.1'
|
||||
hip:
|
||||
version: '5.3.22480'
|
||||
version: '5.5.30571'
|
||||
optix:
|
||||
version: '7.3.0'
|
||||
ocloc:
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
'''
|
||||
"""
|
||||
This script generates the blender.1 man page, embedding the help text
|
||||
from the Blender executable itself. Invoke it as follows:
|
||||
|
||||
@@ -9,7 +9,7 @@ from the Blender executable itself. Invoke it as follows:
|
||||
|
||||
where <path-to-blender> is the path to the Blender executable,
|
||||
and <output-filename> is where to write the generated man page.
|
||||
'''
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
@@ -87,29 +87,29 @@ def man_page_from_blender_help(fh: TextIO, blender_bin: str, verbose: bool) -> N
|
||||
(blender_info["date"], blender_info["version"].replace(".", "\\&."))
|
||||
)
|
||||
|
||||
fh.write(r'''
|
||||
fh.write(r"""
|
||||
.SH NAME
|
||||
blender \- a full-featured 3D application''')
|
||||
blender \- a full-featured 3D application""")
|
||||
|
||||
fh.write(r'''
|
||||
fh.write(r"""
|
||||
.SH SYNOPSIS
|
||||
.B blender [args ...] [file] [args ...]''')
|
||||
.B blender [args ...] [file] [args ...]""")
|
||||
|
||||
fh.write(r'''
|
||||
fh.write(r"""
|
||||
.br
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
.B blender
|
||||
is a full-featured 3D application. It supports the entirety of the 3D pipeline - '''
|
||||
'''modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
|
||||
is a full-featured 3D application. It supports the entirety of the 3D pipeline - """
|
||||
"""modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
|
||||
|
||||
Use Blender to create 3D images and animations, films and commercials, content for games, '''
|
||||
r'''architectural and industrial visualizations, and scientific visualizations.
|
||||
Use Blender to create 3D images and animations, films and commercials, content for games, """
|
||||
r"""architectural and industrial visualizations, and scientific visualizations.
|
||||
|
||||
https://www.blender.org''')
|
||||
https://www.blender.org""")
|
||||
|
||||
fh.write(r'''
|
||||
.SH OPTIONS''')
|
||||
fh.write(r"""
|
||||
.SH OPTIONS""")
|
||||
|
||||
fh.write("\n\n")
|
||||
|
||||
@@ -152,7 +152,7 @@ https://www.blender.org''')
|
||||
|
||||
# Footer Content.
|
||||
|
||||
fh.write(r'''
|
||||
fh.write(r"""
|
||||
.br
|
||||
.SH SEE ALSO
|
||||
.B luxrender(1)
|
||||
@@ -162,7 +162,7 @@ https://www.blender.org''')
|
||||
This manpage was written for a Debian GNU/Linux system by Daniel Mester
|
||||
<mester@uni-bremen.de> and updated by Cyril Brulebois
|
||||
<cyril.brulebois@enst-bretagne.fr> and Dan Eicher <dan@trollwerks.org>.
|
||||
''')
|
||||
""")
|
||||
|
||||
|
||||
def create_argparse() -> argparse.ArgumentParser:
|
||||
|
@@ -865,29 +865,40 @@ Unfortunate Corner Cases
|
||||
Besides all expected cases listed above, there are a few others that should not be
|
||||
an issue but, due to internal implementation details, currently are:
|
||||
|
||||
- ``Object.hide_viewport``, ``Object.hide_select`` and ``Object.hide_render``:
|
||||
Setting any of those Booleans will trigger a rebuild of Collection caches,
|
||||
thus breaking any current iteration over ``Collection.all_objects``.
|
||||
|
||||
Collection Objects
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Changing: ``Object.hide_viewport``, ``Object.hide_select`` or ``Object.hide_render``
|
||||
will trigger a rebuild of Collection caches, thus breaking any current iteration over ``Collection.all_objects``.
|
||||
|
||||
.. rubric:: Do not:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
|
||||
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
|
||||
for object in bpy.data.collections["Collection"].all_objects:
|
||||
object.hide_viewport = True
|
||||
|
||||
|
||||
.. rubric:: Do not:
|
||||
.. rubric:: Do:
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
|
||||
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
|
||||
for object in bpy.data.collections["Collection"].all_objects:
|
||||
object.hide_viewport = True
|
||||
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
|
||||
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
|
||||
for object in bpy.data.collections["Collection"].all_objects[:]:
|
||||
object.hide_viewport = True
|
||||
|
||||
|
||||
.. rubric:: Do:
|
||||
Data-Blocks Renaming During Iteration
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
|
||||
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
|
||||
for object in bpy.data.collections["Collection"].all_objects[:]:
|
||||
object.hide_viewport = True
|
||||
Data-blocks accessed from ``bpy.data`` are sorted when their name is set.
|
||||
Any loop that iterates of a data such as ``bpy.data.objects`` for example,
|
||||
and sets the objects ``name`` must get all items from the iterator first (typically by converting to a list or tuple)
|
||||
to avoid missing some objects and iterating over others multiple times.
|
||||
|
||||
|
||||
sys.exit
|
||||
|
@@ -572,7 +572,7 @@ template<class T> inline bool cmpMinMax(T &minv, T &maxv, const T &val)
|
||||
}
|
||||
template<> inline bool cmpMinMax<Vec3>(Vec3 &minv, Vec3 &maxv, const Vec3 &val)
|
||||
{
|
||||
return (cmpMinMax(minv.x, maxv.x, val.x) | cmpMinMax(minv.y, maxv.y, val.y) |
|
||||
return (cmpMinMax(minv.x, maxv.x, val.x) || cmpMinMax(minv.y, maxv.y, val.y) ||
|
||||
cmpMinMax(minv.z, maxv.z, val.z));
|
||||
}
|
||||
|
||||
|
@@ -281,6 +281,9 @@ endif()
|
||||
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
add_definitions(-DWITH_EMBREE)
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
|
||||
add_definitions(-DWITH_EMBREE_GPU)
|
||||
endif()
|
||||
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
|
||||
include_directories(
|
||||
SYSTEM
|
||||
|
@@ -106,7 +106,7 @@ class CyclesRender(bpy.types.RenderEngine):
|
||||
from . import osl
|
||||
osl.update_script_node(node, self.report)
|
||||
else:
|
||||
self.report({'ERROR'}, "OSL support disabled in this build.")
|
||||
self.report({'ERROR'}, "OSL support disabled in this build")
|
||||
|
||||
def update_render_passes(self, scene, srl):
|
||||
engine.register_passes(self, scene, srl)
|
||||
|
@@ -172,6 +172,8 @@ def system_info():
|
||||
|
||||
|
||||
def list_render_passes(scene, srl):
|
||||
import _cycles
|
||||
|
||||
crl = srl.cycles
|
||||
|
||||
# Combined pass.
|
||||
@@ -250,6 +252,12 @@ def list_render_passes(scene, srl):
|
||||
for lightgroup in srl.lightgroups:
|
||||
yield ("Combined_%s" % lightgroup.name, "RGB", 'COLOR')
|
||||
|
||||
# Path guiding debug passes.
|
||||
if _cycles.with_debug:
|
||||
yield ("Guiding Color", "RGB", 'COLOR')
|
||||
yield ("Guiding Probability", "X", 'VALUE')
|
||||
yield ("Guiding Average Roughness", "X", 'VALUE')
|
||||
|
||||
|
||||
def register_passes(engine, scene, view_layer):
|
||||
for name, channelids, channeltype in list_render_passes(scene, view_layer):
|
||||
|
@@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
default=False,
|
||||
)
|
||||
|
||||
use_oneapirt: BoolProperty(
|
||||
name="Embree on GPU (Experimental)",
|
||||
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
|
||||
"However this support is experimental and some scenes may render incorrectly",
|
||||
default=False,
|
||||
)
|
||||
|
||||
kernel_optimization_level: EnumProperty(
|
||||
name="Kernel Optimization",
|
||||
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
|
||||
@@ -1676,16 +1683,16 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif device_type == 'HIP':
|
||||
if True:
|
||||
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
|
||||
else:
|
||||
import sys
|
||||
if sys.platform[:3] == "win":
|
||||
driver_version = "21.Q4"
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif sys.platform.startswith("linux"):
|
||||
import sys
|
||||
if sys.platform[:3] == "win":
|
||||
driver_version = "21.Q4"
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif sys.platform.startswith("linux"):
|
||||
if True:
|
||||
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
|
||||
else:
|
||||
driver_version = "22.10"
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
|
||||
@@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.prop(self, "kernel_optimization_level")
|
||||
col.prop(self, "use_metalrt")
|
||||
|
||||
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
|
||||
row = layout.row()
|
||||
row.use_property_split = True
|
||||
row.prop(self, "use_oneapirt")
|
||||
|
||||
def draw(self, context):
|
||||
self.draw_impl(self.layout, context)
|
||||
|
||||
|
@@ -803,6 +803,16 @@ static void attr_create_generic(Scene *scene,
|
||||
num_curves, num_keys, data, element, [&](int i) { return float(src[i]); });
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_INT32_2D: {
|
||||
BL::Int2Attribute b_int2_attribute{b_attribute};
|
||||
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
|
||||
Attribute *attr = attributes.add(name, TypeFloat2, element);
|
||||
float2 *data = attr->data_float2();
|
||||
fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
|
||||
return make_float2(float(src[i][0]), float(src[i][1]));
|
||||
});
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_FLOAT_VECTOR: {
|
||||
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
|
||||
const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);
|
||||
|
@@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
|
||||
device.has_peer_memory = false;
|
||||
}
|
||||
|
||||
if (get_boolean(cpreferences, "use_metalrt")) {
|
||||
device.use_metalrt = true;
|
||||
bool accumulated_use_hardware_raytracing = false;
|
||||
foreach (
|
||||
DeviceInfo &info,
|
||||
(device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
|
||||
if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
|
||||
info.use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
|
||||
info.use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
/* There is an accumulative logic here, because Multi-devices are support only for
|
||||
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
|
||||
* global boolean backend setting (see above) for enabling/disabling HW RT,
|
||||
* so all sub-devices in the multi-device should enable (or disable) HW RT
|
||||
* simultaneously (and CPU device are expected to ignore `use_hardware_raytracing` setting). */
|
||||
accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
|
||||
}
|
||||
device.use_hardware_raytracing = accumulated_use_hardware_raytracing;
|
||||
|
||||
if (preview) {
|
||||
/* Disable specialization for preview renders. */
|
||||
|
@@ -280,7 +280,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
|
||||
assert(0);
|
||||
}
|
||||
else {
|
||||
const MEdge *edges = static_cast<const MEdge *>(b_mesh.edges[0].ptr.data);
|
||||
const int2 *edges = static_cast<const int2 *>(b_mesh.edges[0].ptr.data);
|
||||
const size_t verts_num = b_mesh.vertices.length();
|
||||
vector<int> count(verts_num, 0);
|
||||
|
||||
@@ -288,11 +288,11 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
TypeInCycles value = get_value_at_index(i);
|
||||
|
||||
const MEdge &b_edge = edges[i];
|
||||
data[b_edge.v1] += value;
|
||||
data[b_edge.v2] += value;
|
||||
count[b_edge.v1]++;
|
||||
count[b_edge.v2]++;
|
||||
const int2 &b_edge = edges[i];
|
||||
data[b_edge[0]] += value;
|
||||
data[b_edge[1]] += value;
|
||||
count[b_edge[0]]++;
|
||||
count[b_edge[1]]++;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < verts_num; i++) {
|
||||
@@ -528,6 +528,19 @@ static void attr_create_generic(Scene *scene,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_INT32_2D: {
|
||||
BL::Int2Attribute b_int2_attribute{b_attribute};
|
||||
if (b_int2_attribute.data.length() == 0) {
|
||||
continue;
|
||||
}
|
||||
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
|
||||
Attribute *attr = attributes.add(name, TypeFloat2, element);
|
||||
float2 *data = attr->data_float2();
|
||||
fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
|
||||
return make_float2(float(src[i][0]), float(src[i][1]));
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* Not supported. */
|
||||
break;
|
||||
@@ -783,13 +796,13 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
|
||||
EdgeMap visited_edges;
|
||||
memset(&counter[0], 0, sizeof(int) * counter.size());
|
||||
|
||||
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
|
||||
const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
|
||||
const int edges_num = b_mesh.edges.length();
|
||||
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
const MEdge &b_edge = edges[i];
|
||||
const int v0 = vert_orig_index[b_edge.v1];
|
||||
const int v1 = vert_orig_index[b_edge.v2];
|
||||
const int2 &b_edge = edges[i];
|
||||
const int v0 = vert_orig_index[b_edge[0]];
|
||||
const int v1 = vert_orig_index[b_edge[1]];
|
||||
if (visited_edges.exists(v0, v1)) {
|
||||
continue;
|
||||
}
|
||||
@@ -825,9 +838,9 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
|
||||
memset(&counter[0], 0, sizeof(int) * counter.size());
|
||||
visited_edges.clear();
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
const MEdge &b_edge = edges[i];
|
||||
const int v0 = vert_orig_index[b_edge.v1];
|
||||
const int v1 = vert_orig_index[b_edge.v2];
|
||||
const int2 &b_edge = edges[i];
|
||||
const int v0 = vert_orig_index[b_edge[0]];
|
||||
const int v1 = vert_orig_index[b_edge[1]];
|
||||
if (visited_edges.exists(v0, v1)) {
|
||||
continue;
|
||||
}
|
||||
@@ -894,12 +907,12 @@ static void attr_create_random_per_island(Scene *scene,
|
||||
|
||||
DisjointSet vertices_sets(number_of_vertices);
|
||||
|
||||
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
|
||||
const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
|
||||
const int edges_num = b_mesh.edges.length();
|
||||
const int *corner_verts = find_corner_vert_attribute(b_mesh);
|
||||
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
vertices_sets.join(edges[i].v1, edges[i].v2);
|
||||
vertices_sets.join(edges[i][0], edges[i][1]);
|
||||
}
|
||||
|
||||
AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
|
||||
@@ -1221,12 +1234,12 @@ static void create_subd_mesh(Scene *scene,
|
||||
|
||||
mesh->reserve_subd_creases(num_creases);
|
||||
|
||||
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
|
||||
const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
const float crease = creases[i];
|
||||
if (crease != 0.0f) {
|
||||
const MEdge &b_edge = edges[i];
|
||||
mesh->add_edge_crease(b_edge.v1, b_edge.v2, crease);
|
||||
const int2 &b_edge = edges[i];
|
||||
mesh->add_edge_crease(b_edge[0], b_edge[1], crease);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -102,6 +102,16 @@ static void copy_attributes(PointCloud *pointcloud,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_INT32_2D: {
|
||||
BL::Int2Attribute b_int2_attribute{b_attribute};
|
||||
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
|
||||
Attribute *attr = attributes.add(name, TypeFloat2, element);
|
||||
float2 *data = attr->data_float2();
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
data[i] = make_float2(float(src[i][0]), float(src[i][1]));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_FLOAT_VECTOR: {
|
||||
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
|
||||
const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);
|
||||
|
@@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
|
||||
Py_INCREF(Py_False);
|
||||
#endif /* WITH_EMBREE */
|
||||
|
||||
#ifdef WITH_EMBREE_GPU
|
||||
PyModule_AddObject(mod, "with_embree_gpu", Py_True);
|
||||
Py_INCREF(Py_True);
|
||||
#else /* WITH_EMBREE_GPU */
|
||||
PyModule_AddObject(mod, "with_embree_gpu", Py_False);
|
||||
Py_INCREF(Py_False);
|
||||
#endif /* WITH_EMBREE_GPU */
|
||||
|
||||
if (ccl::openimagedenoise_supported()) {
|
||||
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
|
||||
Py_INCREF(Py_True);
|
||||
|
@@ -1061,7 +1061,7 @@ void BlenderSession::ensure_display_driver_if_needed()
|
||||
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(
|
||||
b_engine, b_scene, background);
|
||||
display_driver_ = display_driver.get();
|
||||
session->set_display_driver(move(display_driver));
|
||||
session->set_display_driver(std::move(display_driver));
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -981,22 +981,8 @@ static ShaderNode *add_node(Scene *scene,
|
||||
sky->set_sun_disc(b_sky_node.sun_disc());
|
||||
sky->set_sun_size(b_sky_node.sun_size());
|
||||
sky->set_sun_intensity(b_sky_node.sun_intensity());
|
||||
/* Patch sun position to be able to animate daylight cycle while keeping the shading code
|
||||
* simple. */
|
||||
float sun_rotation = b_sky_node.sun_rotation();
|
||||
/* Wrap into [-2PI..2PI] range. */
|
||||
float sun_elevation = fmodf(b_sky_node.sun_elevation(), M_2PI_F);
|
||||
/* Wrap into [-PI..PI] range. */
|
||||
if (fabsf(sun_elevation) >= M_PI_F) {
|
||||
sun_elevation -= copysignf(2.0f, sun_elevation) * M_PI_F;
|
||||
}
|
||||
/* Wrap into [-PI/2..PI/2] range while keeping the same absolute position. */
|
||||
if (sun_elevation >= M_PI_2_F || sun_elevation <= -M_PI_2_F) {
|
||||
sun_elevation = copysignf(M_PI_F, sun_elevation) - sun_elevation;
|
||||
sun_rotation += M_PI_F;
|
||||
}
|
||||
sky->set_sun_elevation(sun_elevation);
|
||||
sky->set_sun_rotation(sun_rotation);
|
||||
sky->set_sun_elevation(b_sky_node.sun_elevation());
|
||||
sky->set_sun_rotation(b_sky_node.sun_rotation());
|
||||
sky->set_altitude(b_sky_node.altitude());
|
||||
sky->set_air_density(b_sky_node.air_density());
|
||||
sky->set_dust_density(b_sky_node.dust_density());
|
||||
|
@@ -634,6 +634,10 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode
|
||||
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER, false);
|
||||
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT, false);
|
||||
|
||||
MAP_PASS("Guiding Color", PASS_GUIDING_COLOR, false);
|
||||
MAP_PASS("Guiding Probability", PASS_GUIDING_PROBABILITY, false);
|
||||
MAP_PASS("Guiding Average Roughness", PASS_GUIDING_AVG_ROUGHNESS, false);
|
||||
|
||||
if (string_startswith(name, cryptomatte_prefix)) {
|
||||
type = PASS_CRYPTOMATTE;
|
||||
mode = PassMode::DENOISED;
|
||||
@@ -684,18 +688,6 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
|
||||
}
|
||||
scene->film->set_cryptomatte_passes(cryptomatte_passes);
|
||||
|
||||
/* Path guiding debug passes. */
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
b_engine.add_pass("Guiding Color", 3, "RGB", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_GUIDING_COLOR, "Guiding Color", PassMode::NOISY);
|
||||
|
||||
b_engine.add_pass("Guiding Probability", 1, "X", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_GUIDING_PROBABILITY, "Guiding Probability", PassMode::NOISY);
|
||||
|
||||
b_engine.add_pass("Guiding Average Roughness", 1, "X", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_GUIDING_AVG_ROUGHNESS, "Guiding Average Roughness", PassMode::NOISY);
|
||||
#endif
|
||||
|
||||
unordered_set<string> expected_passes;
|
||||
|
||||
/* Custom AOV passes. */
|
||||
|
@@ -527,7 +527,7 @@ BVHNode *BVHBuild::run()
|
||||
if (progress.get_cancel()) {
|
||||
rootnode->deleteSubtree();
|
||||
rootnode = NULL;
|
||||
VLOG_WORK << "BVH build cancelled.";
|
||||
VLOG_WORK << "BVH build canceled.";
|
||||
}
|
||||
else {
|
||||
/*rotate(rootnode, 4, 5);*/
|
||||
|
@@ -606,7 +606,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int4 *bvh_nodes = &bvh->pack.nodes[0];
|
||||
size_t bvh_nodes_size = bvh->pack.nodes.size();
|
||||
|
||||
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
|
||||
for (size_t i = 0; i < bvh_nodes_size;) {
|
||||
size_t nsize, nsize_bbox;
|
||||
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
|
||||
nsize = BVH_UNALIGNED_NODE_SIZE;
|
||||
|
@@ -111,9 +111,13 @@ BVHEmbree::~BVHEmbree()
|
||||
}
|
||||
}
|
||||
|
||||
void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
|
||||
void BVHEmbree::build(Progress &progress,
|
||||
Stats *stats,
|
||||
RTCDevice rtc_device_,
|
||||
const bool rtc_device_is_sycl_)
|
||||
{
|
||||
rtc_device = rtc_device_;
|
||||
rtc_device_is_sycl = rtc_device_is_sycl_;
|
||||
assert(rtc_device);
|
||||
|
||||
rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
|
||||
@@ -266,15 +270,29 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
|
||||
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
|
||||
|
||||
const int *triangles = mesh->get_triangles().data();
|
||||
rtcSetSharedGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_INDEX,
|
||||
0,
|
||||
RTC_FORMAT_UINT3,
|
||||
triangles,
|
||||
0,
|
||||
sizeof(int) * 3,
|
||||
num_triangles);
|
||||
|
||||
if (!rtc_device_is_sycl) {
|
||||
rtcSetSharedGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_INDEX,
|
||||
0,
|
||||
RTC_FORMAT_UINT3,
|
||||
triangles,
|
||||
0,
|
||||
sizeof(int) * 3,
|
||||
num_triangles);
|
||||
}
|
||||
else {
|
||||
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
|
||||
* happen on GPU, and we cannot use standard host pointers at this point. So instead
|
||||
* of making a shared geometry buffer - a new Embree buffer will be created and data
|
||||
* will be copied. */
|
||||
int *triangles_buffer = (int *)rtcSetNewGeometryBuffer(
|
||||
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
|
||||
assert(triangles_buffer);
|
||||
if (triangles_buffer) {
|
||||
static_assert(sizeof(int) == sizeof(uint));
|
||||
std::memcpy(triangles_buffer, triangles, sizeof(int) * 3 * (num_triangles));
|
||||
}
|
||||
}
|
||||
set_tri_vertex_buffer(geom_id, mesh, false);
|
||||
|
||||
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
|
||||
@@ -323,14 +341,38 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
|
||||
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
|
||||
}
|
||||
else {
|
||||
rtcSetSharedGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_VERTEX,
|
||||
t,
|
||||
RTC_FORMAT_FLOAT3,
|
||||
verts,
|
||||
0,
|
||||
sizeof(float3),
|
||||
num_verts + 1);
|
||||
if (!rtc_device_is_sycl) {
|
||||
rtcSetSharedGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_VERTEX,
|
||||
t,
|
||||
RTC_FORMAT_FLOAT3,
|
||||
verts,
|
||||
0,
|
||||
sizeof(float3),
|
||||
num_verts + 1);
|
||||
}
|
||||
else {
|
||||
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
|
||||
* happen on GPU, and we cannot use standard host pointers at this point. So instead
|
||||
* of making a shared geometry buffer - a new Embree buffer will be created and data
|
||||
* will be copied. */
|
||||
/* As float3 is packed on GPU side, we map it to packed_float3. */
|
||||
packed_float3 *verts_buffer = (packed_float3 *)rtcSetNewGeometryBuffer(
|
||||
geom_id,
|
||||
RTC_BUFFER_TYPE_VERTEX,
|
||||
t,
|
||||
RTC_FORMAT_FLOAT3,
|
||||
sizeof(packed_float3),
|
||||
num_verts + 1);
|
||||
assert(verts_buffer);
|
||||
if (verts_buffer) {
|
||||
for (size_t i = (size_t)0; i < num_verts + 1; ++i) {
|
||||
verts_buffer[i].x = verts[i].x;
|
||||
verts_buffer[i].y = verts[i].y;
|
||||
verts_buffer[i].z = verts[i].z;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -29,7 +29,10 @@ class PointCloud;
|
||||
|
||||
class BVHEmbree : public BVH {
|
||||
public:
|
||||
void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
|
||||
void build(Progress &progress,
|
||||
Stats *stats,
|
||||
RTCDevice rtc_device,
|
||||
const bool isSyclEmbreeDevice = false);
|
||||
void refit(Progress &progress);
|
||||
|
||||
RTCScene scene;
|
||||
@@ -55,6 +58,7 @@ class BVHEmbree : public BVH {
|
||||
const bool update);
|
||||
|
||||
RTCDevice rtc_device;
|
||||
bool rtc_device_is_sycl;
|
||||
enum RTCBuildQuality build_quality;
|
||||
};
|
||||
|
||||
|
@@ -42,15 +42,19 @@ endif()
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF)
|
||||
message(STATUS "HIP temporarily disabled due to compiler bugs")
|
||||
if(UNIX)
|
||||
# Disabled until there is a HIP 5.5 release for Linux.
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF)
|
||||
message(STATUS "HIP temporarily disabled due to compiler bugs")
|
||||
else()
|
||||
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
|
||||
find_package(HIP 5.5.0)
|
||||
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
||||
|
||||
# find_package(HIP)
|
||||
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
||||
|
||||
# if(HIP_FOUND)
|
||||
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||
# endif()
|
||||
if(HIP_FOUND)
|
||||
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WITH_HIP_DYNLOAD)
|
||||
|
@@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
|
||||
texture_info.free();
|
||||
}
|
||||
|
||||
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
|
||||
#ifdef WITH_EMBREE
|
||||
|
@@ -56,7 +56,7 @@ class CPUDevice : public Device {
|
||||
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
||||
~CPUDevice();
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||
* re-initialization might be needed). */
|
||||
|
@@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
|
||||
return path_exists(cubins_path);
|
||||
}
|
||||
|
||||
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
@@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
|
@@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
info.has_guiding = true;
|
||||
info.has_profiling = true;
|
||||
info.has_peer_memory = false;
|
||||
info.use_metalrt = false;
|
||||
info.use_hardware_raytracing = false;
|
||||
info.denoisers = DENOISER_ALL;
|
||||
|
||||
foreach (const DeviceInfo &device, subdevices) {
|
||||
@@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
info.has_guiding &= device.has_guiding;
|
||||
info.has_profiling &= device.has_profiling;
|
||||
info.has_peer_memory |= device.has_peer_memory;
|
||||
info.use_metalrt |= device.use_metalrt;
|
||||
info.use_hardware_raytracing |= device.use_hardware_raytracing;
|
||||
info.denoisers &= device.denoisers;
|
||||
}
|
||||
|
||||
|
@@ -71,15 +71,16 @@ class DeviceInfo {
|
||||
string description;
|
||||
string id; /* used for user preferences, should stay fixed with changing hardware config */
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_light_tree; /* Support light tree. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_guiding; /* Support path guiding. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||
bool has_gpu_queue; /* Device supports GPU queue. */
|
||||
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_light_tree; /* Support light tree. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_guiding; /* Support path guiding. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||
bool has_gpu_queue; /* Device supports GPU queue. */
|
||||
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
|
||||
*/
|
||||
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
|
||||
* kernels (Metal only). */
|
||||
DenoiserTypeMask denoisers; /* Supported denoiser types. */
|
||||
@@ -101,7 +102,7 @@ class DeviceInfo {
|
||||
has_profiling = false;
|
||||
has_peer_memory = false;
|
||||
has_gpu_queue = false;
|
||||
use_metalrt = false;
|
||||
use_hardware_raytracing = false;
|
||||
denoisers = DENOISER_NONE;
|
||||
}
|
||||
|
||||
@@ -157,7 +158,7 @@ class Device {
|
||||
fprintf(stderr, "%s\n", error.c_str());
|
||||
fflush(stderr);
|
||||
}
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;
|
||||
|
||||
/* statistics */
|
||||
Stats &stats;
|
||||
|
@@ -20,7 +20,7 @@ class DummyDevice : public Device {
|
||||
|
||||
~DummyDevice() {}
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@@ -137,7 +137,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
||||
info.num = num;
|
||||
|
||||
info.has_nanovdb = true;
|
||||
info.has_light_tree = false;
|
||||
info.has_light_tree = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
info.has_gpu_queue = true;
|
||||
|
@@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
|
||||
return path_exists(fatbins_path);
|
||||
}
|
||||
|
||||
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
@@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
|
@@ -3,7 +3,9 @@
|
||||
|
||||
#include "device/kernel.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
# include "util/log.h"
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
||||
case DEVICE_KERNEL_NUM:
|
||||
break;
|
||||
};
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
|
||||
#endif
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
|
||||
{
|
||||
os << device_kernel_as_string(kernel);
|
||||
@@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)
|
||||
|
||||
return str;
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -3,11 +3,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/types.h"
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
# include "kernel/types.h"
|
||||
|
||||
#include "util/string.h"
|
||||
# include "util/string.h"
|
||||
|
||||
#include <ostream> // NOLINT
|
||||
# include <ostream> // NOLINT
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
|
||||
bool device_kernel_has_intersection(DeviceKernel kernel);
|
||||
|
||||
const char *device_kernel_as_string(DeviceKernel kernel);
|
||||
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
||||
|
||||
typedef uint64_t DeviceKernelMask;
|
||||
string device_kernel_mask_as_string(DeviceKernelMask mask);
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -100,7 +100,7 @@ class MetalDevice : public Device {
|
||||
|
||||
virtual void cancel() override;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
|
@@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
|
||||
return get_device_by_ID(ID, lock) == nullptr;
|
||||
}
|
||||
|
||||
BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
|
||||
}
|
||||
@@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
}
|
||||
case METAL_GPU_AMD: {
|
||||
max_threads_per_threadgroup = 128;
|
||||
use_metalrt = info.use_metalrt;
|
||||
use_metalrt = info.use_hardware_raytracing;
|
||||
break;
|
||||
}
|
||||
case METAL_GPU_APPLE: {
|
||||
max_threads_per_threadgroup = 512;
|
||||
use_metalrt = info.use_metalrt;
|
||||
use_metalrt = info.use_hardware_raytracing;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -96,12 +96,13 @@ class MultiDevice : public Device {
|
||||
return error_msg;
|
||||
}
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
|
||||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
|
||||
foreach (const SubDevice &sub_device, devices) {
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
|
||||
kernel_features);
|
||||
bvh_layout_mask &= device_bvh_layout_mask;
|
||||
bvh_layout_mask_all |= device_bvh_layout_mask;
|
||||
}
|
||||
|
@@ -40,12 +40,12 @@ bool device_oneapi_init()
|
||||
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
|
||||
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
||||
}
|
||||
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
||||
if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
|
||||
}
|
||||
else {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
|
||||
}
|
||||
}
|
||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||
@@ -58,10 +58,10 @@ bool device_oneapi_init()
|
||||
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
||||
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
|
||||
}
|
||||
else {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
|
||||
}
|
||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||
@@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
|
||||
}
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
|
||||
static void device_iterator_cb(
|
||||
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
|
||||
{
|
||||
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
|
||||
|
||||
@@ -112,6 +113,13 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
|
||||
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
|
||||
info.display_device = false;
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
info.use_hardware_raytracing = hwrt_support;
|
||||
# else
|
||||
info.use_hardware_raytracing = false;
|
||||
(void)hwrt_support;
|
||||
# endif
|
||||
|
||||
devices->push_back(info);
|
||||
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
|
||||
}
|
||||
|
@@ -8,7 +8,19 @@
|
||||
# include "util/debug.h"
|
||||
# include "util/log.h"
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
# include "bvh/embree.h"
|
||||
# endif
|
||||
|
||||
# include "kernel/device/oneapi/globals.h"
|
||||
# include "kernel/device/oneapi/kernel.h"
|
||||
|
||||
# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
|
||||
/* These declarations are missing from embree headers when compiling from a compiler that doesn't
|
||||
* support SYCL. */
|
||||
extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
|
||||
extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
|
||||
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler),
|
||||
device_queue_(nullptr),
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
embree_device(nullptr),
|
||||
embree_scene(nullptr),
|
||||
# endif
|
||||
texture_info_(this, "texture_info", MEM_GLOBAL),
|
||||
kg_memory_(nullptr),
|
||||
kg_memory_device_(nullptr),
|
||||
kg_memory_size_(0)
|
||||
{
|
||||
need_texture_info_ = false;
|
||||
use_hardware_raytracing = info.use_hardware_raytracing;
|
||||
|
||||
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
|
||||
|
||||
bool is_finished_ok = create_queue(device_queue_, info.num);
|
||||
bool is_finished_ok = create_queue(device_queue_,
|
||||
info.num,
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
use_hardware_raytracing ? &embree_device : nullptr
|
||||
# else
|
||||
nullptr
|
||||
# endif
|
||||
);
|
||||
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI queue initialization error: got runtime exception \"" +
|
||||
oneapi_error_string_ + "\"");
|
||||
@@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
||||
assert(device_queue_);
|
||||
}
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
|
||||
# else
|
||||
use_hardware_raytracing = false;
|
||||
# endif
|
||||
|
||||
if (use_hardware_raytracing) {
|
||||
VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
|
||||
}
|
||||
|
||||
size_t globals_segment_size;
|
||||
is_finished_ok = kernel_globals_size(globals_segment_size);
|
||||
if (is_finished_ok == false) {
|
||||
@@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
||||
|
||||
OneapiDevice::~OneapiDevice()
|
||||
{
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (embree_device)
|
||||
rtcReleaseDevice(embree_device);
|
||||
# endif
|
||||
|
||||
texture_info_.free();
|
||||
usm_free(device_queue_, kg_memory_);
|
||||
usm_free(device_queue_, kg_memory_device_);
|
||||
@@ -80,15 +120,47 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
|
||||
return false;
|
||||
}
|
||||
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
|
||||
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
/* MNEE and Ray-trace kernels currently don't work correctly with HWRT. */
|
||||
return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
|
||||
}
|
||||
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
|
||||
{
|
||||
return (use_hardware_raytracing &&
|
||||
can_use_hardware_raytracing_for_features(requested_features)) ?
|
||||
BVH_LAYOUT_EMBREE :
|
||||
BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
{
|
||||
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
|
||||
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
||||
if (refit) {
|
||||
bvh_embree->refit(progress);
|
||||
}
|
||||
else {
|
||||
bvh_embree->build(progress, &stats, embree_device, true);
|
||||
}
|
||||
if (bvh->params.top_level) {
|
||||
embree_scene = bvh_embree->scene;
|
||||
}
|
||||
}
|
||||
else {
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
{
|
||||
assert(device_queue_);
|
||||
|
||||
kernel_features = requested_features;
|
||||
|
||||
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
|
||||
@@ -100,7 +172,14 @@ bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
assert(device_queue_);
|
||||
}
|
||||
|
||||
is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
|
||||
if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
|
||||
VLOG_INFO
|
||||
<< "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
|
||||
use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
is_finished_ok = oneapi_load_kernels(
|
||||
device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
|
||||
}
|
||||
@@ -327,6 +406,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
<< string_human_readable_number(size) << " bytes. ("
|
||||
<< string_human_readable_size(size) << ")";
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (strcmp(name, "data") == 0) {
|
||||
assert(size <= sizeof(KernelData));
|
||||
|
||||
/* Update scene handle(since it is different for each device on multi devices) */
|
||||
KernelData *const data = (KernelData *)host;
|
||||
data->device_bvh = embree_scene;
|
||||
}
|
||||
# endif
|
||||
|
||||
ConstMemMap::iterator i = const_mem_map_.find(name);
|
||||
device_vector<uchar> *data;
|
||||
|
||||
@@ -446,7 +535,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
|
||||
# endif
|
||||
}
|
||||
|
||||
bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
|
||||
bool OneapiDevice::create_queue(SyclQueue *&external_queue,
|
||||
int device_index,
|
||||
void *embree_device_pointer)
|
||||
{
|
||||
bool finished_correct = true;
|
||||
try {
|
||||
@@ -457,6 +548,13 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
|
||||
sycl::queue *created_queue = new sycl::queue(devices[device_index],
|
||||
sycl::property::queue::in_order());
|
||||
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (embree_device_pointer) {
|
||||
*((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
|
||||
}
|
||||
# else
|
||||
(void)embree_device_pointer;
|
||||
# endif
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
finished_correct = false;
|
||||
@@ -625,7 +723,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
|
||||
size_t global_size,
|
||||
void **args)
|
||||
{
|
||||
return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
|
||||
return oneapi_enqueue_kernel(
|
||||
kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
|
||||
}
|
||||
|
||||
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
|
||||
@@ -767,9 +866,9 @@ char *OneapiDevice::device_capabilities()
|
||||
|
||||
sycl::id<3> max_work_item_sizes =
|
||||
device.get_info<sycl::info::device::max_work_item_sizes<3>>();
|
||||
WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
|
||||
WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
|
||||
WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
|
||||
WRITE_ATTR(max_work_item_sizes_dim0, ((size_t)max_work_item_sizes.get(0)))
|
||||
WRITE_ATTR(max_work_item_sizes_dim1, ((size_t)max_work_item_sizes.get(1)))
|
||||
WRITE_ATTR(max_work_item_sizes_dim2, ((size_t)max_work_item_sizes.get(2)))
|
||||
|
||||
GET_NUM_ATTR(max_work_group_size)
|
||||
GET_NUM_ATTR(max_num_sub_groups)
|
||||
@@ -792,7 +891,7 @@ char *OneapiDevice::device_capabilities()
|
||||
GET_NUM_ATTR(native_vector_width_half)
|
||||
|
||||
size_t max_clock_frequency = device.get_info<sycl::info::device::max_clock_frequency>();
|
||||
WRITE_ATTR("max_clock_frequency", max_clock_frequency)
|
||||
WRITE_ATTR(max_clock_frequency, max_clock_frequency)
|
||||
|
||||
GET_NUM_ATTR(address_bits)
|
||||
GET_NUM_ATTR(max_mem_alloc_size)
|
||||
@@ -801,7 +900,7 @@ char *OneapiDevice::device_capabilities()
|
||||
* supported so we always return false, even if device supports HW texture usage acceleration.
|
||||
*/
|
||||
bool image_support = false;
|
||||
WRITE_ATTR("image_support", (size_t)image_support)
|
||||
WRITE_ATTR(image_support, (size_t)image_support)
|
||||
|
||||
GET_NUM_ATTR(max_parameter_size)
|
||||
GET_NUM_ATTR(mem_base_addr_align)
|
||||
@@ -830,12 +929,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
|
||||
std::string name = device.get_info<sycl::info::device::name>();
|
||||
# else
|
||||
std::string name = "SYCL Host Task (Debug)";
|
||||
# endif
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
bool hwrt_support = rtcIsSYCLDeviceSupported(device);
|
||||
# else
|
||||
bool hwrt_support = false;
|
||||
# endif
|
||||
std::string id = "ONEAPI_" + platform_name + "_" + name;
|
||||
if (device.has(sycl::aspect::ext_intel_pci_address)) {
|
||||
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
|
||||
}
|
||||
(cb)(id.c_str(), name.c_str(), num, user_ptr);
|
||||
(cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
|
||||
num++;
|
||||
}
|
||||
}
|
||||
|
@@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
|
||||
const char *name,
|
||||
int num,
|
||||
void *user_ptr);
|
||||
typedef void (*OneAPIDeviceIteratorCallback)(
|
||||
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);
|
||||
|
||||
class OneapiDevice : public Device {
|
||||
private:
|
||||
SyclQueue *device_queue_;
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
RTCDevice embree_device;
|
||||
RTCScene embree_scene;
|
||||
# endif
|
||||
using ConstMemMap = map<string, device_vector<uchar> *>;
|
||||
ConstMemMap const_mem_map_;
|
||||
device_vector<TextureInfo> texture_info_;
|
||||
@@ -34,17 +35,21 @@ class OneapiDevice : public Device {
|
||||
size_t kg_memory_size_ = (size_t)0;
|
||||
size_t max_memory_on_device_ = (size_t)0;
|
||||
std::string oneapi_error_string_;
|
||||
bool use_hardware_raytracing = false;
|
||||
unsigned int kernel_features = 0;
|
||||
|
||||
public:
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
|
||||
|
||||
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
virtual ~OneapiDevice();
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
# endif
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
bool load_kernels(const uint requested_features) override;
|
||||
bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
@@ -113,8 +118,9 @@ class OneapiDevice : public Device {
|
||||
SyclQueue *sycl_queue();
|
||||
|
||||
protected:
|
||||
bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
|
||||
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
|
||||
bool create_queue(SyclQueue *&external_queue, int device_index);
|
||||
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
|
||||
void free_queue(SyclQueue *queue);
|
||||
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
|
||||
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
|
||||
|
@@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
|
||||
return make_unique<OptiXDeviceQueue>(this);
|
||||
}
|
||||
|
||||
BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
/* OptiX has its own internal acceleration structure format. */
|
||||
return BVH_LAYOUT_OPTIX;
|
||||
|
@@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
|
||||
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
~OptiXDevice();
|
||||
|
||||
BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
|
@@ -574,7 +574,7 @@ void PathTrace::denoise(const RenderWork &render_work)
|
||||
|
||||
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
|
||||
{
|
||||
output_driver_ = move(driver);
|
||||
output_driver_ = std::move(driver);
|
||||
}
|
||||
|
||||
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
|
||||
@@ -585,7 +585,7 @@ void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
|
||||
destroy_gpu_resources();
|
||||
|
||||
if (driver) {
|
||||
display_ = make_unique<PathTraceDisplay>(move(driver));
|
||||
display_ = make_unique<PathTraceDisplay>(std::move(driver));
|
||||
}
|
||||
else {
|
||||
display_ = nullptr;
|
||||
|
@@ -9,7 +9,9 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) {}
|
||||
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(std::move(driver))
|
||||
{
|
||||
}
|
||||
|
||||
void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
|
||||
{
|
||||
|
@@ -357,8 +357,12 @@ void PathTraceWorkCPU::guiding_push_sample_data_to_global_storage(
|
||||
# if PATH_GUIDING_LEVEL >= 2
|
||||
const bool use_direct_light = kernel_data.integrator.use_guiding_direct_light;
|
||||
const bool use_mis_weights = kernel_data.integrator.use_guiding_mis_weights;
|
||||
# if OPENPGL_VERSION_MINOR >= 5
|
||||
kg->opgl_path_segment_storage->PrepareSamples(use_mis_weights, use_direct_light, false);
|
||||
# else
|
||||
kg->opgl_path_segment_storage->PrepareSamples(
|
||||
false, nullptr, use_mis_weights, use_direct_light, false);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef WITH_CYCLES_DEBUG
|
||||
|
@@ -28,6 +28,7 @@ static size_t estimate_single_state_size(const uint kernel_features)
|
||||
#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
|
||||
state_size += (kernel_features & (feature)) ? sizeof(type) : 0;
|
||||
#define KERNEL_STRUCT_END(name) \
|
||||
(void)array_index; \
|
||||
break; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
|
||||
@@ -139,6 +140,7 @@ void PathTraceWorkGPU::alloc_integrator_soa()
|
||||
integrator_state_gpu_.parent_struct[array_index].name = (type *)array->device_pointer; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END(name) \
|
||||
(void)array_index; \
|
||||
break; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
|
||||
@@ -299,8 +301,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
||||
* schedules work in halves of available number of paths. */
|
||||
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
||||
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
|
||||
0);
|
||||
work_tile_scheduler_.set_accelerated_rt(
|
||||
(device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
|
||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||
start_sample,
|
||||
samples_num,
|
||||
|
@@ -55,21 +55,29 @@ void WorkTileScheduler::reset_scheduler_state()
|
||||
|
||||
VLOG_WORK << "Will schedule tiles of size " << tile_size_;
|
||||
|
||||
if (VLOG_IS_ON(3)) {
|
||||
/* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile scheduling
|
||||
* and purely focusing on the number of used path states. */
|
||||
const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
|
||||
tile_size_.num_samples;
|
||||
const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
|
||||
VLOG_WORK << "Number of unused path states: "
|
||||
<< max_num_path_states_ - num_tiles * num_path_states_in_tile;
|
||||
const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
|
||||
tile_size_.num_samples;
|
||||
|
||||
if (num_path_states_in_tile == 0) {
|
||||
num_tiles_x_ = 0;
|
||||
num_tiles_y_ = 0;
|
||||
num_tiles_per_sample_range_ = 0;
|
||||
}
|
||||
else {
|
||||
if (VLOG_IS_ON(3)) {
|
||||
/* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
|
||||
* scheduling and purely focusing on the number of used path states. */
|
||||
const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
|
||||
VLOG_WORK << "Number of unused path states: "
|
||||
<< max_num_path_states_ - num_tiles * num_path_states_in_tile;
|
||||
}
|
||||
|
||||
num_tiles_x_ = divide_up(image_size_px_.x, tile_size_.width);
|
||||
num_tiles_y_ = divide_up(image_size_px_.y, tile_size_.height);
|
||||
num_tiles_per_sample_range_ = divide_up(samples_num_, tile_size_.num_samples);
|
||||
}
|
||||
|
||||
num_tiles_x_ = divide_up(image_size_px_.x, tile_size_.width);
|
||||
num_tiles_y_ = divide_up(image_size_px_.y, tile_size_.height);
|
||||
|
||||
total_tiles_num_ = num_tiles_x_ * num_tiles_y_;
|
||||
num_tiles_per_sample_range_ = divide_up(samples_num_, tile_size_.num_samples);
|
||||
|
||||
next_work_index_ = 0;
|
||||
total_work_size_ = total_tiles_num_ * num_tiles_per_sample_range_;
|
||||
|
@@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
|
||||
device/oneapi/compat.h
|
||||
device/oneapi/context_begin.h
|
||||
device/oneapi/context_end.h
|
||||
device/oneapi/context_intersect_begin.h
|
||||
device/oneapi/context_intersect_end.h
|
||||
device/oneapi/globals.h
|
||||
device/oneapi/image.h
|
||||
device/oneapi/kernel.h
|
||||
device/oneapi/kernel_templates.h
|
||||
device/cpu/bvh.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||
@@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
|
||||
# Set defaults for spir64 and spir64_gen options
|
||||
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
|
||||
endif()
|
||||
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
|
||||
@@ -775,8 +778,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
|
||||
# Host execution won't use GPU binaries, no need to compile them.
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
|
||||
# AoT binaries aren't currently reused when calling sycl::build.
|
||||
list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
|
||||
# Iterate over all targest and their options
|
||||
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
|
||||
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
|
||||
@@ -798,6 +799,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
-I"${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
|
||||
list(APPEND sycl_compiler_flags
|
||||
-DWITH_EMBREE
|
||||
-DWITH_EMBREE_GPU
|
||||
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
|
||||
-I"${EMBREE_INCLUDE_DIRS}")
|
||||
|
||||
if(WIN32)
|
||||
list(APPEND sycl_compiler_flags
|
||||
-ladvapi32.lib
|
||||
)
|
||||
endif()
|
||||
|
||||
set(next_library_mode "")
|
||||
foreach(library ${EMBREE_LIBRARIES})
|
||||
string(TOLOWER "${library}" library_lower)
|
||||
if(("${library_lower}" STREQUAL "optimized") OR
|
||||
("${library_lower}" STREQUAL "debug"))
|
||||
set(next_library_mode "${library_lower}")
|
||||
else()
|
||||
if(next_library_mode STREQUAL "")
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
||||
else()
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
||||
endif()
|
||||
set(next_library_mode "")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
foreach(library ${TBB_LIBRARIES})
|
||||
string(TOLOWER "${library}" library_lower)
|
||||
if(("${library_lower}" STREQUAL "optimized") OR
|
||||
("${library_lower}" STREQUAL "debug"))
|
||||
set(next_library_mode "${library_lower}")
|
||||
else()
|
||||
if(next_library_mode STREQUAL "")
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
||||
else()
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
||||
endif()
|
||||
set(next_library_mode "")
|
||||
endif()
|
||||
endforeach()
|
||||
list(APPEND sycl_compiler_flags
|
||||
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
@@ -21,6 +21,28 @@
|
||||
# define __BVH2__
|
||||
#endif
|
||||
|
||||
#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
|
||||
/* bool is apparently not tested for specialization constants:
|
||||
* https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
|
||||
* Instead of adding one more bool specialization constant, we reuse existing embree_features one
|
||||
* and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
|
||||
*/
|
||||
/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and ray-trace kernels
|
||||
* pre-compiled without Embree.
|
||||
* Changing this default value would require updating the logic in oneapi_load_kernels(). */
|
||||
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
|
||||
RTC_FEATURE_FLAG_NONE};
|
||||
# define IF_USING_EMBREE \
|
||||
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
|
||||
RTC_FEATURE_FLAG_NONE)
|
||||
# define IF_NOT_USING_EMBREE \
|
||||
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
|
||||
RTC_FEATURE_FLAG_NONE)
|
||||
#else
|
||||
# define IF_USING_EMBREE
|
||||
# define IF_NOT_USING_EMBREE
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __BVH2__
|
||||
@@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Single object BVH traversal, for SSS/AO/bevel. */
|
||||
@@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(
|
||||
kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
@@ -239,13 +289,28 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
# ifdef __EMBREE__
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, visibility);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
|
||||
|
||||
@@ -275,18 +340,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
|
||||
|
@@ -51,8 +51,6 @@ ccl_device_inline
|
||||
int object = OBJECT_NONE;
|
||||
float isect_t = ray->tmax;
|
||||
|
||||
int num_hits_in_instance = 0;
|
||||
|
||||
uint num_hits = 0;
|
||||
isect_array->t = ray->tmax;
|
||||
|
||||
@@ -152,7 +150,6 @@ ccl_device_inline
|
||||
/* Move on to next entry in intersections array. */
|
||||
isect_array++;
|
||||
num_hits++;
|
||||
num_hits_in_instance++;
|
||||
isect_array->t = isect_t;
|
||||
if (num_hits == max_hits) {
|
||||
return num_hits;
|
||||
@@ -193,7 +190,6 @@ ccl_device_inline
|
||||
/* Move on to next entry in intersections array. */
|
||||
isect_array++;
|
||||
num_hits++;
|
||||
num_hits_in_instance++;
|
||||
isect_array->t = isect_t;
|
||||
if (num_hits == max_hits) {
|
||||
return num_hits;
|
||||
@@ -219,7 +215,6 @@ ccl_device_inline
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
num_hits_in_instance = 0;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
++stack_ptr;
|
||||
|
@@ -64,6 +64,7 @@ KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf)
|
||||
KERNEL_DATA_ARRAY(KernelLightTreeNode, light_tree_nodes)
|
||||
KERNEL_DATA_ARRAY(KernelLightTreeEmitter, light_tree_emitters)
|
||||
KERNEL_DATA_ARRAY(uint, light_to_tree)
|
||||
KERNEL_DATA_ARRAY(uint, object_to_tree)
|
||||
KERNEL_DATA_ARRAY(uint, object_lookup_offset)
|
||||
KERNEL_DATA_ARRAY(uint, triangle_to_tree)
|
||||
|
||||
|
@@ -20,6 +20,7 @@ KERNEL_STRUCT_BEGIN(KernelBackground, background)
|
||||
/* xyz store direction, w the angle. float4 instead of float3 is used
|
||||
* to ensure consistent padding/alignment across devices. */
|
||||
KERNEL_STRUCT_MEMBER(background, float4, sun)
|
||||
KERNEL_STRUCT_MEMBER(background, int, use_sun_guiding)
|
||||
/* Only shader index. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, surface_shader)
|
||||
KERNEL_STRUCT_MEMBER(background, int, volume_shader)
|
||||
@@ -39,6 +40,10 @@ KERNEL_STRUCT_MEMBER(background, int, use_mis)
|
||||
KERNEL_STRUCT_MEMBER(background, int, lightgroup)
|
||||
/* Light Index. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, light_index)
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad2)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad3)
|
||||
KERNEL_STRUCT_END(KernelBackground)
|
||||
|
||||
/* BVH: own BVH2 if no native device acceleration struct used. */
|
||||
|
@@ -13,8 +13,13 @@
|
||||
# include <embree3/rtcore_scene.h>
|
||||
#endif
|
||||
|
||||
#include "kernel/device/cpu/compat.h"
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/compat.h"
|
||||
# include "kernel/device/oneapi/globals.h"
|
||||
#else
|
||||
# include "kernel/device/cpu/compat.h"
|
||||
# include "kernel/device/cpu/globals.h"
|
||||
#endif
|
||||
|
||||
#include "kernel/bvh/types.h"
|
||||
#include "kernel/bvh/util.h"
|
||||
@@ -33,11 +38,16 @@ using numhit_t = uint8_t;
|
||||
using numhit_t = uint32_t;
|
||||
#endif
|
||||
|
||||
#define CYCLES_EMBREE_USED_FEATURES \
|
||||
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
|
||||
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# define CYCLES_EMBREE_USED_FEATURES \
|
||||
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
|
||||
#else
|
||||
# define CYCLES_EMBREE_USED_FEATURES \
|
||||
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
|
||||
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
|
||||
#endif
|
||||
|
||||
#define EMBREE_IS_HAIR(x) (x & 1)
|
||||
|
||||
@@ -99,7 +109,9 @@ struct CCLVolumeContext
|
||||
#if EMBREE_MAJOR_VERSION >= 4
|
||||
KernelGlobals kg;
|
||||
const Ray *ray;
|
||||
# ifdef __VOLUME_RECORD_ALL__
|
||||
numhit_t max_hits;
|
||||
# endif
|
||||
numhit_t num_hits;
|
||||
#endif
|
||||
Intersection *vol_isect;
|
||||
@@ -252,7 +264,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
|
||||
* Things like recording subsurface or shadow hits for later evaluation
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls. */
|
||||
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
|
||||
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
assert(args->N == 1);
|
||||
@@ -263,7 +276,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
|
||||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(
|
||||
@@ -277,7 +294,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls.
|
||||
*/
|
||||
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
@@ -290,7 +307,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
||||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
Intersection current_isect;
|
||||
@@ -326,7 +347,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
||||
}
|
||||
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
|
||||
if (ctx->num_recorded_hits < max_record_hits) {
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
@@ -363,7 +384,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
||||
*args->valid = 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
@@ -376,7 +397,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
||||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
/* Check if it's hitting the correct object. */
|
||||
@@ -462,7 +487,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
||||
*args->valid = 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
@@ -475,11 +500,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
||||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
/* Append the intersection to the end of the array. */
|
||||
if (ctx->num_hits < ctx->max_hits) {
|
||||
#endif
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(
|
||||
kg, ray, hit, ¤t_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
|
||||
@@ -496,10 +527,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
||||
int object_flag = kernel_data_fetch(object_flag, tri_object);
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
--ctx->num_hits;
|
||||
#ifndef __VOLUME_RECORD_ALL__
|
||||
/* Without __VOLUME_RECORD_ALL__ we need only a first counted hit, so we will
|
||||
* continue tracing only if a current hit is not counted. */
|
||||
*args->valid = 0;
|
||||
#endif
|
||||
}
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if EMBREE_MAJOR_VERSION < 4
|
||||
@@ -513,14 +551,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(
|
||||
|
||||
switch (ctx->type) {
|
||||
case CCLIntersectContext::RAY_SHADOW_ALL:
|
||||
kernel_embree_filter_occluded_shadow_all_func(args);
|
||||
kernel_embree_filter_occluded_shadow_all_func_impl(args);
|
||||
break;
|
||||
case CCLIntersectContext::RAY_LOCAL:
|
||||
case CCLIntersectContext::RAY_SSS:
|
||||
kernel_embree_filter_occluded_local_func(args);
|
||||
kernel_embree_filter_occluded_local_func_impl(args);
|
||||
break;
|
||||
case CCLIntersectContext::RAY_VOLUME_ALL:
|
||||
kernel_embree_filter_occluded_volume_all_func(args);
|
||||
kernel_embree_filter_occluded_volume_all_func_impl(args);
|
||||
break;
|
||||
|
||||
case CCLIntersectContext::RAY_REGULAR:
|
||||
@@ -569,7 +607,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
|
||||
|
||||
kernel_embree_filter_occluded_func(args);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_intersection_func_impl(args);
|
||||
}
|
||||
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
|
||||
}
|
||||
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_occluded_local_func_impl(args);
|
||||
}
|
||||
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
|
||||
}
|
||||
|
||||
# define kernel_embree_filter_intersection_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
|
||||
# define kernel_embree_filter_occluded_shadow_all_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
|
||||
# define kernel_embree_filter_occluded_local_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
|
||||
# define kernel_embree_filter_occluded_volume_all_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
|
||||
#else
|
||||
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
# define kernel_embree_filter_occluded_shadow_all_func \
|
||||
kernel_embree_filter_occluded_shadow_all_func_impl
|
||||
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
|
||||
# define kernel_embree_filter_occluded_volume_all_func \
|
||||
kernel_embree_filter_occluded_volume_all_func_impl
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Scene intersection. */
|
||||
@@ -583,7 +677,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
||||
#if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLFirstHitContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
#else
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
rtcInitIntersectContext(&ctx);
|
||||
@@ -596,7 +698,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
||||
#if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCIntersectArguments args;
|
||||
rtcInitIntersectArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
|
||||
@@ -625,7 +727,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLLocalContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
# else
|
||||
CCLIntersectContext ctx(kg,
|
||||
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
|
||||
@@ -646,7 +756,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCOccludedArguments args;
|
||||
rtcInitOccludedArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
# endif
|
||||
@@ -692,7 +802,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||
IntegratorShadowStateCPU *state,
|
||||
IntegratorShadowState state,
|
||||
ccl_private const Ray *ray,
|
||||
uint visibility,
|
||||
uint max_hits,
|
||||
@@ -702,7 +812,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLShadowContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
# else
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
rtcInitIntersectContext(&ctx);
|
||||
@@ -718,7 +836,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCOccludedArguments args;
|
||||
rtcInitOccludedArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(
|
||||
kernel_embree_filter_occluded_shadow_all_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
||||
@@ -736,19 +855,31 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||
ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
# ifdef __VOLUME_RECORD_ALL__
|
||||
const uint max_hits,
|
||||
# endif
|
||||
const uint visibility)
|
||||
{
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLVolumeContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko) Cycles GPU back-ends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
# else
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
rtcInitIntersectContext(&ctx);
|
||||
# endif
|
||||
ctx.vol_isect = isect;
|
||||
# ifdef __VOLUME_RECORD_ALL__
|
||||
ctx.max_hits = numhit_t(max_hits);
|
||||
# endif
|
||||
ctx.num_hits = numhit_t(0);
|
||||
ctx.ray = ray;
|
||||
RTCRay rtc_ray;
|
||||
@@ -756,7 +887,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCOccludedArguments args;
|
||||
rtcInitOccludedArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(
|
||||
kernel_embree_filter_occluded_volume_all_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
||||
|
@@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
/* Intersection kernels need access to the kernel handler for specialization constants to work
|
||||
* properly. */
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_begin.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
||||
ccl_global const int *path_index_array,
|
||||
@@ -185,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_end.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_background,
|
||||
ccl_global const int *path_index_array,
|
||||
@@ -249,6 +259,12 @@ ccl_gpu_kernel_postfix
|
||||
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
|
||||
#endif
|
||||
|
||||
/* Kernels using intersections need access to the kernel handler for specialization constants to
|
||||
* work properly. */
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_begin.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
|
||||
ccl_global const int *path_index_array,
|
||||
@@ -287,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_end.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_volume,
|
||||
|
@@ -5,6 +5,11 @@
|
||||
|
||||
#define __KERNEL_GPU__
|
||||
#define __KERNEL_ONEAPI__
|
||||
#define __KERNEL_64_BIT__
|
||||
|
||||
#ifdef WITH_EMBREE_GPU
|
||||
# define __KERNEL_GPU_RAYTRACING__
|
||||
#endif
|
||||
|
||||
#define CCL_NAMESPACE_BEGIN
|
||||
#define CCL_NAMESPACE_END
|
||||
@@ -57,17 +62,19 @@
|
||||
#define ccl_gpu_kernel_threads(block_num_threads)
|
||||
|
||||
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
|
||||
# define ccl_gpu_kernel_signature(name, ...) \
|
||||
# define __ccl_gpu_kernel_signature(name, ...) \
|
||||
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||
size_t kernel_global_size, \
|
||||
size_t kernel_local_size, \
|
||||
sycl::handler &cgh, \
|
||||
__VA_ARGS__) { \
|
||||
(kg); \
|
||||
cgh.parallel_for<class kernel_##name>( \
|
||||
cgh.parallel_for( \
|
||||
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
||||
[=](sycl::nd_item<1> item) {
|
||||
|
||||
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
|
||||
|
||||
# define ccl_gpu_kernel_postfix \
|
||||
}); \
|
||||
}
|
||||
|
18
intern/cycles/kernel/device/oneapi/context_intersect_begin.h
Normal file
18
intern/cycles/kernel/device/oneapi/context_intersect_begin.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2023 Intel Corporation */
|
||||
|
||||
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
|
||||
# undef ccl_gpu_kernel_signature
|
||||
# define ccl_gpu_kernel_signature(name, ...) \
|
||||
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||
size_t kernel_global_size, \
|
||||
size_t kernel_local_size, \
|
||||
sycl::handler &cgh, \
|
||||
__VA_ARGS__) \
|
||||
{ \
|
||||
(kg); \
|
||||
cgh.parallel_for( \
|
||||
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
||||
[=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
|
||||
((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
|
||||
#endif
|
@@ -0,0 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2023 Intel Corporation */
|
||||
|
||||
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
|
||||
# undef ccl_gpu_kernel_signature
|
||||
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
|
||||
#endif
|
@@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
|
||||
size_t nd_item_group_range_0;
|
||||
size_t nd_item_global_id_0;
|
||||
size_t nd_item_global_range_0;
|
||||
#else
|
||||
sycl::kernel_handler kernel_handler;
|
||||
#endif
|
||||
} KernelGlobalsGPU;
|
||||
|
||||
|
@@ -16,9 +16,22 @@
|
||||
|
||||
# include "kernel/device/gpu/kernel.h"
|
||||
|
||||
# include "device/kernel.cpp"
|
||||
|
||||
static OneAPIErrorCallback s_error_cb = nullptr;
|
||||
static void *s_error_user_ptr = nullptr;
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
|
||||
(const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
|
||||
RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
|
||||
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
|
||||
(const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
|
||||
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
|
||||
# endif
|
||||
|
||||
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
|
||||
{
|
||||
s_error_cb = cb;
|
||||
@@ -142,15 +155,99 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
|
||||
return std::min(limit_work_group_size, preferred_work_group_size);
|
||||
}
|
||||
|
||||
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
|
||||
const uint kernel_features)
|
||||
{
|
||||
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool oneapi_kernel_is_raytrace_or_mnee(const std::string &kernel_name)
|
||||
{
|
||||
return (kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
|
||||
std::string::npos) ||
|
||||
(kernel_name.find(device_kernel_as_string(
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) != std::string::npos);
|
||||
}
|
||||
|
||||
bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
|
||||
{
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* MNEE and Ray-trace kernels aren't yet enabled to use Embree. */
|
||||
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
|
||||
DeviceKernel kernel = (DeviceKernel)i;
|
||||
if (device_kernel_has_intersection(kernel)) {
|
||||
if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
|
||||
return !oneapi_kernel_is_raytrace_or_mnee(kernel_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
# endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool oneapi_load_kernels(SyclQueue *queue_,
|
||||
const uint kernel_features,
|
||||
bool use_hardware_raytracing)
|
||||
{
|
||||
# ifdef SYCL_SKIP_KERNELS_PRELOAD
|
||||
(void)queue_;
|
||||
(void)requested_features;
|
||||
# else
|
||||
assert(queue_);
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* For best performance, we always JIT compile the kernels that are using Embree. */
|
||||
if (use_hardware_raytracing) {
|
||||
try {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||
{queue->get_device()});
|
||||
|
||||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
|
||||
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
|
||||
!oneapi_kernel_is_using_embree(kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
|
||||
/* Hair requires embree curves support. */
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
else {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
if (s_error_cb) {
|
||||
s_error_cb(e.what(), s_error_user_ptr);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
try {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||
@@ -159,27 +256,29 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
|
||||
/* NOTE(@nsirgien): Names in this conditions below should match names from
|
||||
* oneapi_call macro in oneapi_enqueue_kernel below */
|
||||
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
|
||||
/* In case HWRT is on, compilation of kernels using Embree is already handled in previous
|
||||
* block. */
|
||||
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
|
||||
(use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (oneapi_kernel_is_using_embree(kernel_name) ||
|
||||
oneapi_kernel_is_raytrace_or_mnee(kernel_name)) {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
RTC_FEATURE_FLAG_NONE);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
|
||||
std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
sycl::build(one_kernel_bundle);
|
||||
# endif
|
||||
/* This call will ensure that AoT or cached JIT binaries are available
|
||||
* for execution. It will trigger compilation if it is not already the case. */
|
||||
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
|
||||
{kernel_id});
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
@@ -188,13 +287,14 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||
}
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
int kernel,
|
||||
size_t global_size,
|
||||
const uint kernel_features,
|
||||
bool use_hardware_raytracing,
|
||||
void **args)
|
||||
{
|
||||
bool success = true;
|
||||
@@ -248,6 +348,21 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
|
||||
try {
|
||||
queue->submit([&](sycl::handler &cgh) {
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* Spec says it has no effect if the called kernel doesn't support the below specialization
|
||||
* constant but it can still trigger a recompilation, so we set it only if needed. */
|
||||
if (device_kernel_has_intersection(device_kernel)) {
|
||||
const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
|
||||
RTC_FEATURE_FLAG_NONE :
|
||||
!(kernel_features & KERNEL_FEATURE_HAIR) ?
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
|
||||
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
used_embree_features);
|
||||
}
|
||||
# else
|
||||
(void)kernel_features;
|
||||
# endif
|
||||
switch (device_kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_RESET: {
|
||||
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
|
||||
@@ -549,4 +664,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
# endif
|
||||
return success;
|
||||
}
|
||||
|
||||
#endif /* WITH_ONEAPI */
|
||||
|
@@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
|
||||
int kernel,
|
||||
size_t global_size,
|
||||
const unsigned int kernel_features,
|
||||
bool use_hardware_raytracing,
|
||||
void **args);
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
|
||||
const unsigned int requested_features);
|
||||
const unsigned int kernel_features,
|
||||
bool use_hardware_raytracing);
|
||||
# ifdef __cplusplus
|
||||
}
|
||||
|
||||
# endif
|
||||
#endif /* WITH_ONEAPI */
|
||||
|
@@ -454,8 +454,13 @@ ccl_device_forceinline bool guiding_bsdf_init(KernelGlobals kg,
|
||||
ccl_private float &rand)
|
||||
{
|
||||
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
|
||||
# if OPENPGL_VERSION_MINOR >= 5
|
||||
if (kg->opgl_surface_sampling_distribution->Init(
|
||||
kg->opgl_guiding_field, guiding_point3f(P), rand)) {
|
||||
# else
|
||||
if (kg->opgl_surface_sampling_distribution->Init(
|
||||
kg->opgl_guiding_field, guiding_point3f(P), rand, true)) {
|
||||
# endif
|
||||
kg->opgl_surface_sampling_distribution->ApplyCosineProduct(guiding_point3f(N));
|
||||
return true;
|
||||
}
|
||||
@@ -506,8 +511,13 @@ ccl_device_forceinline bool guiding_phase_init(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
|
||||
# if OPENPGL_VERSION_MINOR >= 5
|
||||
if (kg->opgl_volume_sampling_distribution->Init(
|
||||
kg->opgl_guiding_field, guiding_point3f(P), rand)) {
|
||||
# else
|
||||
if (kg->opgl_volume_sampling_distribution->Init(
|
||||
kg->opgl_guiding_field, guiding_point3f(P), rand, true)) {
|
||||
# endif
|
||||
kg->opgl_volume_sampling_distribution->ApplySingleLobeHenyeyGreensteinProduct(guiding_vec3f(D),
|
||||
g);
|
||||
return true;
|
||||
|
@@ -342,7 +342,7 @@ ccl_device_forceinline void area_light_update_position(const ccl_global KernelLi
|
||||
ls->D = normalize_len(ls->P - P, &ls->t);
|
||||
ls->pdf = invarea;
|
||||
|
||||
if (klight->area.tan_half_spread > 0) {
|
||||
if (klight->area.normalize_spread > 0) {
|
||||
ls->eval_fac = 0.25f * invarea;
|
||||
ls->eval_fac *= area_light_spread_attenuation(
|
||||
ls->D, ls->Ng, klight->area.tan_half_spread, klight->area.normalize_spread);
|
||||
|
@@ -56,7 +56,7 @@ ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
|
||||
const int index = light_distribution_sample(kg, randn);
|
||||
const float pdf_selection = kernel_data.integrator.distribution_pdf_lights;
|
||||
return light_sample<in_volume_segment>(
|
||||
kg, randu, randv, time, P, bounce, path_flag, index, pdf_selection, ls);
|
||||
kg, randu, randv, time, P, bounce, path_flag, index, 0, pdf_selection, ls);
|
||||
}
|
||||
|
||||
ccl_device_inline float light_distribution_pdf_lamp(KernelGlobals kg)
|
||||
|
@@ -108,6 +108,7 @@ ccl_device_noinline bool light_sample(KernelGlobals kg,
|
||||
const int bounce,
|
||||
const uint32_t path_flag,
|
||||
const int emitter_index,
|
||||
const int object_id,
|
||||
const float pdf_selection,
|
||||
ccl_private LightSample *ls)
|
||||
{
|
||||
@@ -117,8 +118,9 @@ ccl_device_noinline bool light_sample(KernelGlobals kg,
|
||||
if (kernel_data.integrator.use_light_tree) {
|
||||
ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
|
||||
emitter_index);
|
||||
prim = kemitter->prim_id;
|
||||
mesh_light = kemitter->mesh_light;
|
||||
prim = kemitter->light.id;
|
||||
mesh_light.shader_flag = kemitter->mesh_light.shader_flag;
|
||||
mesh_light.object_id = object_id;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@@ -438,7 +438,9 @@ ccl_device_inline float light_sample_mis_weight_forward_surface(KernelGlobals kg
|
||||
const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
|
||||
uint lookup_offset = kernel_data_fetch(object_lookup_offset, sd->object);
|
||||
uint prim_offset = kernel_data_fetch(object_prim_offset, sd->object);
|
||||
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, sd->prim - prim_offset + lookup_offset);
|
||||
uint triangle = kernel_data_fetch(triangle_to_tree, sd->prim - prim_offset + lookup_offset);
|
||||
|
||||
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, sd->object, triangle);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -462,7 +464,7 @@ ccl_device_inline float light_sample_mis_weight_forward_lamp(KernelGlobals kg,
|
||||
#ifdef __LIGHT_TREE__
|
||||
if (kernel_data.integrator.use_light_tree) {
|
||||
const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
|
||||
pdf *= light_tree_pdf(kg, P, N, path_flag, ~ls->lamp);
|
||||
pdf *= light_tree_pdf(kg, P, N, path_flag, 0, kernel_data_fetch(light_to_tree, ls->lamp));
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -496,7 +498,8 @@ ccl_device_inline float light_sample_mis_weight_forward_background(KernelGlobals
|
||||
#ifdef __LIGHT_TREE__
|
||||
if (kernel_data.integrator.use_light_tree) {
|
||||
const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
|
||||
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, ~kernel_data.background.light_index);
|
||||
uint light = kernel_data_fetch(light_to_tree, kernel_data.background.light_index);
|
||||
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, 0, light);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@@ -69,6 +69,59 @@ ccl_device float3 compute_v(
|
||||
cos_phi0 * o0 + dot_o1_a * inv_len * o1;
|
||||
}
|
||||
|
||||
ccl_device_inline bool is_light(const ccl_global KernelLightTreeEmitter *kemitter)
|
||||
{
|
||||
return kemitter->light.id < 0;
|
||||
}
|
||||
|
||||
ccl_device_inline bool is_mesh(const ccl_global KernelLightTreeEmitter *kemitter)
|
||||
{
|
||||
return !is_light(kemitter) && kemitter->mesh_light.object_id == OBJECT_NONE;
|
||||
}
|
||||
|
||||
ccl_device_inline bool is_triangle(const ccl_global KernelLightTreeEmitter *kemitter)
|
||||
{
|
||||
return !is_light(kemitter) && kemitter->mesh_light.object_id != OBJECT_NONE;
|
||||
}
|
||||
|
||||
ccl_device_inline bool is_leaf(const ccl_global KernelLightTreeNode *knode)
|
||||
{
|
||||
/* The distant node is also considered o leaf node. */
|
||||
return knode->type >= LIGHT_TREE_LEAF;
|
||||
}
|
||||
|
||||
template<bool in_volume_segment>
|
||||
ccl_device void light_tree_to_local_space(KernelGlobals kg,
|
||||
const int object_id,
|
||||
ccl_private float3 &P,
|
||||
ccl_private float3 &N_or_D,
|
||||
ccl_private float &t)
|
||||
{
|
||||
const int object_flag = kernel_data_fetch(object_flag, object_id);
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#ifdef __OBJECT_MOTION__
|
||||
Transform itfm;
|
||||
object_fetch_transform_motion_test(kg, object_id, 0.5f, &itfm);
|
||||
#else
|
||||
const Transform itfm = object_fetch_transform(kg, object_id, OBJECT_INVERSE_TRANSFORM);
|
||||
#endif
|
||||
P = transform_point(&itfm, P);
|
||||
if (in_volume_segment) {
|
||||
/* Transform direction. */
|
||||
float3 D_local = transform_direction(&itfm, N_or_D);
|
||||
float scale;
|
||||
N_or_D = normalize_len(D_local, &scale);
|
||||
|
||||
t *= scale;
|
||||
}
|
||||
else if (!is_zero(N_or_D)) {
|
||||
/* Transform normal. */
|
||||
const Transform tfm = object_fetch_transform(kg, object_id, OBJECT_TRANSFORM);
|
||||
N_or_D = normalize(transform_direction_transposed(&tfm, N_or_D));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the general function for calculating the importance of either a cluster or an emitter.
|
||||
* Both of the specialized functions obtain the necessary data before calling this function. */
|
||||
template<bool in_volume_segment>
|
||||
@@ -184,9 +237,8 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
|
||||
ccl_private float3 ¢roid,
|
||||
ccl_private packed_float3 &dir)
|
||||
{
|
||||
const int prim_id = kemitter->prim_id;
|
||||
if (prim_id < 0) {
|
||||
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~prim_id);
|
||||
if (is_light(kemitter)) {
|
||||
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~(kemitter->light.id));
|
||||
centroid = klight->co;
|
||||
|
||||
switch (klight->type) {
|
||||
@@ -213,19 +265,22 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
kernel_assert(is_triangle(kemitter));
|
||||
const int object = kemitter->mesh_light.object_id;
|
||||
float3 vertices[3];
|
||||
triangle_world_space_vertices(kg, object, prim_id, -1.0f, vertices);
|
||||
triangle_vertices(kg, kemitter->triangle.id, vertices);
|
||||
centroid = (vertices[0] + vertices[1] + vertices[2]) / 3.0f;
|
||||
|
||||
const bool is_front_only = (kemitter->emission_sampling == EMISSION_SAMPLING_FRONT);
|
||||
const bool is_back_only = (kemitter->emission_sampling == EMISSION_SAMPLING_BACK);
|
||||
const bool is_front_only = (kemitter->triangle.emission_sampling == EMISSION_SAMPLING_FRONT);
|
||||
const bool is_back_only = (kemitter->triangle.emission_sampling == EMISSION_SAMPLING_BACK);
|
||||
if (is_front_only || is_back_only) {
|
||||
dir = safe_normalize(cross(vertices[1] - vertices[0], vertices[2] - vertices[0]));
|
||||
if (is_back_only) {
|
||||
dir = -dir;
|
||||
}
|
||||
if (kernel_data_fetch(object_flag, object) & SD_OBJECT_NEGATIVE_SCALE) {
|
||||
const int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if ((object_flag & SD_OBJECT_TRANSFORM_APPLIED) &&
|
||||
(object_flag & SD_OBJECT_NEGATIVE_SCALE)) {
|
||||
dir = -dir;
|
||||
}
|
||||
}
|
||||
@@ -237,6 +292,75 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
|
||||
return true;
|
||||
}
|
||||
|
||||
template<bool in_volume_segment>
|
||||
ccl_device void light_tree_node_importance(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 N_or_D,
|
||||
const float t,
|
||||
const bool has_transmission,
|
||||
const ccl_global KernelLightTreeNode *knode,
|
||||
ccl_private float &max_importance,
|
||||
ccl_private float &min_importance)
|
||||
{
|
||||
const BoundingCone bcone = knode->bcone;
|
||||
const BoundingBox bbox = knode->bbox;
|
||||
|
||||
float3 point_to_centroid;
|
||||
float cos_theta_u;
|
||||
float distance;
|
||||
if (knode->type == LIGHT_TREE_DISTANT) {
|
||||
if (in_volume_segment) {
|
||||
return;
|
||||
}
|
||||
point_to_centroid = -bcone.axis;
|
||||
cos_theta_u = fast_cosf(bcone.theta_o);
|
||||
distance = 1.0f;
|
||||
}
|
||||
else {
|
||||
const float3 centroid = 0.5f * (bbox.min + bbox.max);
|
||||
|
||||
if (in_volume_segment) {
|
||||
const float3 D = N_or_D;
|
||||
const float3 closest_point = P + dot(centroid - P, D) * D;
|
||||
/* Minimal distance of the ray to the cluster. */
|
||||
distance = len(centroid - closest_point);
|
||||
point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
|
||||
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
|
||||
}
|
||||
else {
|
||||
const float3 N = N_or_D;
|
||||
const float3 bbox_extent = bbox.max - centroid;
|
||||
const bool bbox_is_visible = has_transmission |
|
||||
(dot(N, centroid - P) + dot(fabs(N), fabs(bbox_extent)) > 0);
|
||||
|
||||
/* If the node is guaranteed to be behind the surface we're sampling, and the surface is
|
||||
* opaque, then we can give the node an importance of 0 as it contributes nothing to the
|
||||
* surface. */
|
||||
if (!bbox_is_visible) {
|
||||
return;
|
||||
}
|
||||
|
||||
point_to_centroid = normalize_len(centroid - P, &distance);
|
||||
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, P, point_to_centroid);
|
||||
}
|
||||
/* Clamp distance to half the radius of the cluster when splitting is disabled. */
|
||||
distance = fmaxf(0.5f * len(centroid - bbox.max), distance);
|
||||
}
|
||||
/* TODO: currently max_distance = min_distance, max_importance = min_importance for the
|
||||
* nodes. Do we need better weights for complex scenes? */
|
||||
light_tree_importance<in_volume_segment>(N_or_D,
|
||||
has_transmission,
|
||||
point_to_centroid,
|
||||
cos_theta_u,
|
||||
bcone,
|
||||
distance,
|
||||
distance,
|
||||
t,
|
||||
knode->energy,
|
||||
max_importance,
|
||||
min_importance);
|
||||
}
|
||||
|
||||
template<bool in_volume_segment>
|
||||
ccl_device void light_tree_emitter_importance(KernelGlobals kg,
|
||||
const float3 P,
|
||||
@@ -247,11 +371,21 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
|
||||
ccl_private float &max_importance,
|
||||
ccl_private float &min_importance)
|
||||
{
|
||||
max_importance = 0.0f;
|
||||
min_importance = 0.0f;
|
||||
|
||||
const ccl_global KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
|
||||
emitter_index);
|
||||
|
||||
max_importance = 0.0f;
|
||||
min_importance = 0.0f;
|
||||
if (is_mesh(kemitter)) {
|
||||
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes,
|
||||
kemitter->mesh.node_id);
|
||||
|
||||
light_tree_node_importance<in_volume_segment>(
|
||||
kg, P, N_or_D, t, has_transmission, knode, max_importance, min_importance);
|
||||
return;
|
||||
}
|
||||
|
||||
BoundingCone bcone;
|
||||
bcone.theta_o = kemitter->theta_o;
|
||||
bcone.theta_e = kemitter->theta_e;
|
||||
@@ -264,8 +398,6 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
|
||||
return;
|
||||
}
|
||||
|
||||
const int prim_id = kemitter->prim_id;
|
||||
|
||||
if (in_volume_segment) {
|
||||
const float3 D = N_or_D;
|
||||
/* Closest point. */
|
||||
@@ -279,9 +411,15 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
|
||||
P_c = P;
|
||||
}
|
||||
|
||||
/* Early out if the emitter is guaranteed to be invisible. */
|
||||
bool is_visible;
|
||||
if (prim_id < 0) {
|
||||
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~prim_id);
|
||||
if (is_triangle(kemitter)) {
|
||||
is_visible = triangle_light_tree_parameters<in_volume_segment>(
|
||||
kg, kemitter, centroid, P_c, N_or_D, bcone, cos_theta_u, distance, point_to_centroid);
|
||||
}
|
||||
else {
|
||||
kernel_assert(is_light(kemitter));
|
||||
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~(kemitter->light.id));
|
||||
switch (klight->type) {
|
||||
/* Function templates only modifies cos_theta_u when in_volume_segment = true. */
|
||||
case LIGHT_SPOT:
|
||||
@@ -309,10 +447,6 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
|
||||
return;
|
||||
}
|
||||
}
|
||||
else { /* Mesh light. */
|
||||
is_visible = triangle_light_tree_parameters<in_volume_segment>(
|
||||
kg, kemitter, centroid, P_c, N_or_D, bcone, cos_theta_u, distance, point_to_centroid);
|
||||
}
|
||||
|
||||
is_visible |= has_transmission;
|
||||
if (!is_visible) {
|
||||
@@ -333,81 +467,31 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
|
||||
}
|
||||
|
||||
template<bool in_volume_segment>
|
||||
ccl_device void light_tree_node_importance(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 N_or_D,
|
||||
const float t,
|
||||
const bool has_transmission,
|
||||
const ccl_global KernelLightTreeNode *knode,
|
||||
ccl_private float &max_importance,
|
||||
ccl_private float &min_importance)
|
||||
ccl_device void light_tree_child_importance(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 N_or_D,
|
||||
const float t,
|
||||
const bool has_transmission,
|
||||
const ccl_global KernelLightTreeNode *knode,
|
||||
ccl_private float &max_importance,
|
||||
ccl_private float &min_importance)
|
||||
{
|
||||
max_importance = 0.0f;
|
||||
min_importance = 0.0f;
|
||||
|
||||
if (knode->num_emitters == 1) {
|
||||
/* At a leaf node with only one emitter. */
|
||||
light_tree_emitter_importance<in_volume_segment>(
|
||||
kg, P, N_or_D, t, has_transmission, -knode->child_index, max_importance, min_importance);
|
||||
light_tree_emitter_importance<in_volume_segment>(kg,
|
||||
P,
|
||||
N_or_D,
|
||||
t,
|
||||
has_transmission,
|
||||
knode->leaf.first_emitter,
|
||||
max_importance,
|
||||
min_importance);
|
||||
}
|
||||
else if (knode->num_emitters != 0) {
|
||||
const BoundingCone bcone = knode->bcone;
|
||||
const BoundingBox bbox = knode->bbox;
|
||||
|
||||
float3 point_to_centroid;
|
||||
float cos_theta_u;
|
||||
float distance;
|
||||
if (knode->bit_trail == 1) {
|
||||
/* Distant light node. */
|
||||
if (in_volume_segment) {
|
||||
return;
|
||||
}
|
||||
point_to_centroid = -bcone.axis;
|
||||
cos_theta_u = fast_cosf(bcone.theta_o);
|
||||
distance = 1.0f;
|
||||
}
|
||||
else {
|
||||
const float3 centroid = 0.5f * (bbox.min + bbox.max);
|
||||
|
||||
if (in_volume_segment) {
|
||||
const float3 D = N_or_D;
|
||||
const float3 closest_point = P + dot(centroid - P, D) * D;
|
||||
/* Minimal distance of the ray to the cluster. */
|
||||
distance = len(centroid - closest_point);
|
||||
point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
|
||||
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
|
||||
}
|
||||
else {
|
||||
const float3 N = N_or_D;
|
||||
const float3 bbox_extent = bbox.max - centroid;
|
||||
const bool bbox_is_visible = has_transmission |
|
||||
(dot(N, centroid - P) + dot(fabs(N), fabs(bbox_extent)) > 0);
|
||||
|
||||
/* If the node is guaranteed to be behind the surface we're sampling, and the surface is
|
||||
* opaque, then we can give the node an importance of 0 as it contributes nothing to the
|
||||
* surface. */
|
||||
if (!bbox_is_visible) {
|
||||
return;
|
||||
}
|
||||
|
||||
point_to_centroid = normalize_len(centroid - P, &distance);
|
||||
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, P, point_to_centroid);
|
||||
}
|
||||
/* Clamp distance to half the radius of the cluster when splitting is disabled. */
|
||||
distance = fmaxf(0.5f * len(centroid - bbox.max), distance);
|
||||
}
|
||||
/* TODO: currently max_distance = min_distance, max_importance = min_importance for the
|
||||
* nodes. Do we need better weights for complex scenes? */
|
||||
light_tree_importance<in_volume_segment>(N_or_D,
|
||||
has_transmission,
|
||||
point_to_centroid,
|
||||
cos_theta_u,
|
||||
bcone,
|
||||
distance,
|
||||
distance,
|
||||
t,
|
||||
knode->energy,
|
||||
max_importance,
|
||||
min_importance);
|
||||
light_tree_node_importance<in_volume_segment>(
|
||||
kg, P, N_or_D, t, has_transmission, knode, max_importance, min_importance);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,26 +524,30 @@ ccl_device void sample_resevoir(const int current_index,
|
||||
template<bool in_volume_segment>
|
||||
ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
|
||||
ccl_private float &rand,
|
||||
const float3 P,
|
||||
const float3 N_or_D,
|
||||
const float t,
|
||||
ccl_private float3 &P,
|
||||
ccl_private float3 &N_or_D,
|
||||
ccl_private float &t,
|
||||
const bool has_transmission,
|
||||
const ccl_global KernelLightTreeNode *knode,
|
||||
ccl_private int *node_index,
|
||||
ccl_private float *pdf_factor)
|
||||
{
|
||||
float selected_importance[2] = {0.0f, 0.0f};
|
||||
float total_importance[2] = {0.0f, 0.0f};
|
||||
int selected_index = -1;
|
||||
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, *node_index);
|
||||
*node_index = -1;
|
||||
|
||||
/* Mark emitters with zero importance. Used for resevoir when total minimum importance = 0. */
|
||||
kernel_assert(knode->num_emitters <= sizeof(uint) * 8);
|
||||
uint has_importance = 0;
|
||||
|
||||
const bool sample_max = (rand > 0.5f); /* Sampling using the maximum importance. */
|
||||
rand = rand * 2.0f - float(sample_max);
|
||||
if (knode->num_emitters > 1) {
|
||||
rand = rand * 2.0f - float(sample_max);
|
||||
}
|
||||
|
||||
for (int i = 0; i < knode->num_emitters; i++) {
|
||||
int current_index = -knode->child_index + i;
|
||||
int current_index = knode->leaf.first_emitter + i;
|
||||
/* maximum importance = importance[0], minimum importance = importance[1] */
|
||||
float importance[2];
|
||||
light_tree_emitter_importance<in_volume_segment>(
|
||||
@@ -492,7 +580,7 @@ ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
|
||||
else {
|
||||
selected_index = -1;
|
||||
for (int i = 0; i < knode->num_emitters; i++) {
|
||||
int current_index = -knode->child_index + i;
|
||||
int current_index = knode->inner.right_child + i;
|
||||
sample_resevoir(current_index,
|
||||
float(has_importance & 1),
|
||||
selected_index,
|
||||
@@ -508,8 +596,24 @@ ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
|
||||
*pdf_factor = 0.5f * (selected_importance[0] / total_importance[0] +
|
||||
selected_importance[1] / total_importance[1]);
|
||||
*pdf_factor *= 0.5f * (selected_importance[0] / total_importance[0] +
|
||||
selected_importance[1] / total_importance[1]);
|
||||
|
||||
const ccl_global KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
|
||||
selected_index);
|
||||
|
||||
if (is_mesh(kemitter)) {
|
||||
/* Transform ray from world to local space. */
|
||||
light_tree_to_local_space<in_volume_segment>(kg, kemitter->mesh.object_id, P, N_or_D, t);
|
||||
|
||||
*node_index = kemitter->mesh.node_id;
|
||||
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes,
|
||||
*node_index);
|
||||
if (knode->type == LIGHT_TREE_INSTANCE) {
|
||||
/* Switch to the node with the subtree. */
|
||||
*node_index = knode->instance.reference;
|
||||
}
|
||||
}
|
||||
|
||||
return selected_index;
|
||||
}
|
||||
@@ -528,9 +632,9 @@ ccl_device bool get_left_probability(KernelGlobals kg,
|
||||
const ccl_global KernelLightTreeNode *right = &kernel_data_fetch(light_tree_nodes, right_index);
|
||||
|
||||
float min_left_importance, max_left_importance, min_right_importance, max_right_importance;
|
||||
light_tree_node_importance<in_volume_segment>(
|
||||
light_tree_child_importance<in_volume_segment>(
|
||||
kg, P, N_or_D, t, has_transmission, left, max_left_importance, min_left_importance);
|
||||
light_tree_node_importance<in_volume_segment>(
|
||||
light_tree_child_importance<in_volume_segment>(
|
||||
kg, P, N_or_D, t, has_transmission, right, max_right_importance, min_right_importance);
|
||||
|
||||
const float total_max_importance = max_left_importance + max_right_importance;
|
||||
@@ -556,8 +660,8 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
|
||||
const float randv,
|
||||
const float time,
|
||||
const float3 P,
|
||||
const float3 N_or_D,
|
||||
const float t,
|
||||
float3 N_or_D,
|
||||
float t,
|
||||
const int shader_flags,
|
||||
const int bounce,
|
||||
const uint32_t path_flag,
|
||||
@@ -571,28 +675,38 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
|
||||
float pdf_leaf = 1.0f;
|
||||
float pdf_selection = 1.0f;
|
||||
int selected_emitter = -1;
|
||||
|
||||
int object = 0;
|
||||
int node_index = 0; /* Root node. */
|
||||
|
||||
float3 local_P = P;
|
||||
|
||||
/* Traverse the light tree until a leaf node is reached. */
|
||||
while (true) {
|
||||
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, node_index);
|
||||
|
||||
if (knode->child_index <= 0) {
|
||||
if (is_leaf(knode)) {
|
||||
/* At a leaf node, we pick an emitter. */
|
||||
selected_emitter = light_tree_cluster_select_emitter<in_volume_segment>(
|
||||
kg, randn, P, N_or_D, t, has_transmission, knode, &pdf_selection);
|
||||
break;
|
||||
kg, randn, local_P, N_or_D, t, has_transmission, &node_index, &pdf_selection);
|
||||
|
||||
if (node_index < 0) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
/* Continue with the picked mesh light. */
|
||||
object = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* At an interior node, the left child is directly after the parent, while the right child is
|
||||
* stored as the child index. */
|
||||
const int left_index = node_index + 1;
|
||||
const int right_index = knode->child_index;
|
||||
const int right_index = knode->inner.right_child;
|
||||
|
||||
float left_prob;
|
||||
if (!get_left_probability<in_volume_segment>(
|
||||
kg, P, N_or_D, t, has_transmission, left_index, right_index, left_prob)) {
|
||||
kg, local_P, N_or_D, t, has_transmission, left_index, right_index, left_prob)) {
|
||||
return false; /* Both child nodes have zero importance. */
|
||||
}
|
||||
|
||||
@@ -610,38 +724,104 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
|
||||
pdf_selection *= pdf_leaf;
|
||||
|
||||
return light_sample<in_volume_segment>(
|
||||
kg, randu, randv, time, P, bounce, path_flag, selected_emitter, pdf_selection, ls);
|
||||
kg, randu, randv, time, P, bounce, path_flag, selected_emitter, object, pdf_selection, ls);
|
||||
}
|
||||
|
||||
/* We need to be able to find the probability of selecting a given light for MIS. */
|
||||
ccl_device float light_tree_pdf(
|
||||
KernelGlobals kg, const float3 P, const float3 N, const int path_flag, const int emitter)
|
||||
KernelGlobals kg, float3 P, float3 N, const int path_flag, const int object, const uint target)
|
||||
{
|
||||
const bool has_transmission = (path_flag & PATH_RAY_MIS_HAD_TRANSMISSION);
|
||||
/* Target emitter info. */
|
||||
const int target_emitter = (emitter >= 0) ? kernel_data_fetch(triangle_to_tree, emitter) :
|
||||
kernel_data_fetch(light_to_tree, ~emitter);
|
||||
ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
|
||||
target_emitter);
|
||||
const int target_leaf = kemitter->parent_index;
|
||||
ccl_global const KernelLightTreeNode *kleaf = &kernel_data_fetch(light_tree_nodes, target_leaf);
|
||||
uint bit_trail = kleaf->bit_trail;
|
||||
|
||||
int node_index = 0; /* Root node. */
|
||||
ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
|
||||
target);
|
||||
int root_index, target_leaf;
|
||||
uint bit_trail, target_emitter;
|
||||
|
||||
if (is_triangle(kemitter)) {
|
||||
/* If the target is an emissive triangle, first traverse the top level tree to find the mesh
|
||||
* light emitter, then traverse the subtree. */
|
||||
target_emitter = kernel_data_fetch(object_to_tree, object);
|
||||
ccl_global const KernelLightTreeEmitter *kmesh = &kernel_data_fetch(light_tree_emitters,
|
||||
target_emitter);
|
||||
target_leaf = kmesh->parent_index;
|
||||
root_index = kmesh->mesh.node_id;
|
||||
ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes, root_index);
|
||||
bit_trail = kroot->bit_trail;
|
||||
|
||||
if (kroot->type == LIGHT_TREE_INSTANCE) {
|
||||
root_index = kroot->instance.reference;
|
||||
}
|
||||
}
|
||||
else {
|
||||
root_index = 0;
|
||||
target_leaf = kemitter->parent_index;
|
||||
bit_trail = kernel_data_fetch(light_tree_nodes, target_leaf).bit_trail;
|
||||
target_emitter = target;
|
||||
}
|
||||
|
||||
float pdf = 1.0f;
|
||||
int node_index = 0;
|
||||
|
||||
/* Traverse the light tree until we reach the target leaf node. */
|
||||
while (true) {
|
||||
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, node_index);
|
||||
|
||||
if (knode->child_index <= 0) {
|
||||
break;
|
||||
if (is_leaf(knode)) {
|
||||
kernel_assert(node_index == target_leaf);
|
||||
ccl_global const KernelLightTreeNode *kleaf = &kernel_data_fetch(light_tree_nodes,
|
||||
target_leaf);
|
||||
|
||||
/* Iterate through leaf node to find the probability of sampling the target emitter. */
|
||||
float target_max_importance = 0.0f;
|
||||
float target_min_importance = 0.0f;
|
||||
float total_max_importance = 0.0f;
|
||||
float total_min_importance = 0.0f;
|
||||
int num_has_importance = 0;
|
||||
for (int i = 0; i < kleaf->num_emitters; i++) {
|
||||
const int emitter = kleaf->leaf.first_emitter + i;
|
||||
float max_importance, min_importance;
|
||||
light_tree_emitter_importance<false>(
|
||||
kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
|
||||
num_has_importance += (max_importance > 0);
|
||||
if (emitter == target_emitter) {
|
||||
target_max_importance = max_importance;
|
||||
target_min_importance = min_importance;
|
||||
}
|
||||
total_max_importance += max_importance;
|
||||
total_min_importance += min_importance;
|
||||
}
|
||||
|
||||
if (target_max_importance > 0.0f) {
|
||||
pdf *= 0.5f * (target_max_importance / total_max_importance +
|
||||
(total_min_importance > 0 ? target_min_importance / total_min_importance :
|
||||
1.0f / num_has_importance));
|
||||
}
|
||||
else {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
if (root_index) {
|
||||
/* Arrived at the mesh light. Continue with the subtree. */
|
||||
float unused;
|
||||
light_tree_to_local_space<false>(kg, object, P, N, unused);
|
||||
|
||||
node_index = root_index;
|
||||
root_index = 0;
|
||||
target_emitter = target;
|
||||
target_leaf = kemitter->parent_index;
|
||||
bit_trail = kernel_data_fetch(light_tree_nodes, target_leaf).bit_trail;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
kernel_assert(node_index == target_leaf);
|
||||
return pdf;
|
||||
}
|
||||
}
|
||||
|
||||
/* Interior node. */
|
||||
const int left_index = node_index + 1;
|
||||
const int right_index = knode->child_index;
|
||||
const int right_index = knode->inner.right_child;
|
||||
|
||||
float left_prob;
|
||||
if (!get_left_probability<false>(
|
||||
@@ -658,36 +838,6 @@ ccl_device float light_tree_pdf(
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
kernel_assert(node_index == target_leaf);
|
||||
|
||||
/* Iterate through leaf node to find the probability of sampling the target emitter. */
|
||||
float target_max_importance = 0.0f;
|
||||
float target_min_importance = 0.0f;
|
||||
float total_max_importance = 0.0f;
|
||||
float total_min_importance = 0.0f;
|
||||
int num_has_importance = 0;
|
||||
for (int i = 0; i < kleaf->num_emitters; i++) {
|
||||
const int emitter = -kleaf->child_index + i;
|
||||
float max_importance, min_importance;
|
||||
light_tree_emitter_importance<false>(
|
||||
kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
|
||||
num_has_importance += (max_importance > 0);
|
||||
if (emitter == target_emitter) {
|
||||
target_max_importance = max_importance;
|
||||
target_min_importance = min_importance;
|
||||
}
|
||||
total_max_importance += max_importance;
|
||||
total_min_importance += min_importance;
|
||||
}
|
||||
|
||||
if (target_max_importance > 0.0f) {
|
||||
return pdf * 0.5f *
|
||||
(target_max_importance / total_max_importance +
|
||||
(total_min_importance > 0 ? target_min_importance / total_min_importance :
|
||||
1.0f / num_has_importance));
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -304,9 +304,8 @@ ccl_device_forceinline bool triangle_light_tree_parameters(
|
||||
|
||||
cos_theta_u = FLT_MAX;
|
||||
|
||||
const int object = kemitter->mesh_light.object_id;
|
||||
float3 vertices[3];
|
||||
triangle_world_space_vertices(kg, object, kemitter->prim_id, -1.0f, vertices);
|
||||
triangle_vertices(kg, kemitter->triangle.id, vertices);
|
||||
|
||||
bool shape_above_surface = false;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@@ -1390,19 +1390,128 @@ ccl_device_extern void osl_noiseparams_set_impulses(ccl_private OSLNoiseOptions
|
||||
res->y = n; \
|
||||
res->z = n; \
|
||||
} \
|
||||
ccl_device_extern void name##_vv(ccl_private float3 *res, const float3 *v) \
|
||||
ccl_device_extern void name##_vv(ccl_private float3 *res, ccl_private const float3 *v) \
|
||||
{ \
|
||||
const float n = name##_fv(v); \
|
||||
res->x = n; \
|
||||
res->y = n; \
|
||||
res->z = n; \
|
||||
} \
|
||||
ccl_device_extern void name##_vvf(ccl_private float3 *res, const float3 *v, float w) \
|
||||
ccl_device_extern void name##_vvf( \
|
||||
ccl_private float3 *res, ccl_private const float3 *v, float w) \
|
||||
{ \
|
||||
const float n = name##_fvf(v, w); \
|
||||
res->x = n; \
|
||||
res->y = n; \
|
||||
res->z = n; \
|
||||
} \
|
||||
ccl_device_extern void name##_dfdf(ccl_private float *res, ccl_private const float *x) \
|
||||
{ \
|
||||
res[0] = name##_ff(x[0]); \
|
||||
res[1] = name##_ff(x[1]); \
|
||||
res[2] = name##_ff(x[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dfdff( \
|
||||
ccl_private float *res, ccl_private const float *x, float y) \
|
||||
{ \
|
||||
res[0] = name##_fff(x[0], y); \
|
||||
res[1] = name##_fff(x[1], y); \
|
||||
res[2] = name##_fff(x[2], y); \
|
||||
} \
|
||||
ccl_device_extern void name##_dffdf( \
|
||||
ccl_private float *res, float x, ccl_private const float *y) \
|
||||
{ \
|
||||
res[0] = name##_fff(x, y[0]); \
|
||||
res[1] = name##_fff(x, y[1]); \
|
||||
res[2] = name##_fff(x, y[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dfdfdf( \
|
||||
ccl_private float *res, ccl_private const float *x, ccl_private const float *y) \
|
||||
{ \
|
||||
res[0] = name##_fff(x[0], y[0]); \
|
||||
res[1] = name##_fff(x[1], y[1]); \
|
||||
res[2] = name##_fff(x[2], y[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dfdv(ccl_private float *res, ccl_private const float3 *v) \
|
||||
{ \
|
||||
res[0] = name##_fv(&v[0]); \
|
||||
res[1] = name##_fv(&v[1]); \
|
||||
res[2] = name##_fv(&v[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dfdvf( \
|
||||
ccl_private float *res, ccl_private const float3 *v, float w) \
|
||||
{ \
|
||||
res[0] = name##_fvf(&v[0], w); \
|
||||
res[1] = name##_fvf(&v[1], w); \
|
||||
res[2] = name##_fvf(&v[2], w); \
|
||||
} \
|
||||
ccl_device_extern void name##_dfvdf( \
|
||||
ccl_private float *res, ccl_private const float3 *v, ccl_private const float *w) \
|
||||
{ \
|
||||
res[0] = name##_fvf(v, w[0]); \
|
||||
res[1] = name##_fvf(v, w[1]); \
|
||||
res[2] = name##_fvf(v, w[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dfdvdf( \
|
||||
ccl_private float *res, ccl_private const float3 *v, ccl_private const float *w) \
|
||||
{ \
|
||||
res[0] = name##_fvf(&v[0], w[0]); \
|
||||
res[1] = name##_fvf(&v[1], w[1]); \
|
||||
res[2] = name##_fvf(&v[2], w[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvdf(ccl_private float3 *res, ccl_private const float *x) \
|
||||
{ \
|
||||
name##_vf(&res[0], x[0]); \
|
||||
name##_vf(&res[1], x[1]); \
|
||||
name##_vf(&res[2], x[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvdff( \
|
||||
ccl_private float3 *res, ccl_private const float *x, float y) \
|
||||
{ \
|
||||
name##_vff(&res[0], x[0], y); \
|
||||
name##_vff(&res[1], x[1], y); \
|
||||
name##_vff(&res[2], x[2], y); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvfdf( \
|
||||
ccl_private float3 *res, float x, ccl_private const float *y) \
|
||||
{ \
|
||||
name##_vff(&res[0], x, y[0]); \
|
||||
name##_vff(&res[1], x, y[1]); \
|
||||
name##_vff(&res[2], x, y[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvdfdf( \
|
||||
ccl_private float3 *res, ccl_private const float *x, ccl_private const float *y) \
|
||||
{ \
|
||||
name##_vff(&res[0], x[0], y[0]); \
|
||||
name##_vff(&res[1], x[1], y[1]); \
|
||||
name##_vff(&res[2], x[2], y[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvdv(ccl_private float3 *res, ccl_private const float3 *v) \
|
||||
{ \
|
||||
name##_vv(&res[0], &v[0]); \
|
||||
name##_vv(&res[1], &v[1]); \
|
||||
name##_vv(&res[2], &v[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvdvf( \
|
||||
ccl_private float3 *res, ccl_private const float3 *v, float w) \
|
||||
{ \
|
||||
name##_vvf(&res[0], &v[0], w); \
|
||||
name##_vvf(&res[1], &v[1], w); \
|
||||
name##_vvf(&res[2], &v[2], w); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvvdf( \
|
||||
ccl_private float3 *res, ccl_private const float3 *v, ccl_private const float *w) \
|
||||
{ \
|
||||
name##_vvf(&res[0], v, w[0]); \
|
||||
name##_vvf(&res[1], v, w[1]); \
|
||||
name##_vvf(&res[2], v, w[2]); \
|
||||
} \
|
||||
ccl_device_extern void name##_dvdvdf( \
|
||||
ccl_private float3 *res, ccl_private const float3 *v, ccl_private const float *w) \
|
||||
{ \
|
||||
name##_vvf(&res[0], &v[0], w[0]); \
|
||||
name##_vvf(&res[1], &v[1], w[1]); \
|
||||
name##_vvf(&res[2], &v[2], w[2]); \
|
||||
}
|
||||
|
||||
ccl_device_forceinline float hashnoise_1d(float p)
|
||||
|
@@ -132,11 +132,11 @@ color sky_radiance_nishita(vector dir, float nishita_data[10], string filename)
|
||||
/* definitions */
|
||||
vector sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2);
|
||||
float sun_dir_angle = precise_angle(dir, sun_dir);
|
||||
float half_angular = angular_diameter / 2.0;
|
||||
float half_angular = angular_diameter * 0.5;
|
||||
float dir_elevation = M_PI_2 - direction[0];
|
||||
|
||||
/* if ray inside sun disc render it, otherwise render sky.
|
||||
* alternatively, ignore the sun if we're evaluating the background texture. */
|
||||
/* If the ray is inside the sun disc, render it, otherwise render the sky.
|
||||
* Alternatively, ignore the sun if we're evaluating the background texture. */
|
||||
if (sun_dir_angle < half_angular && sun_disc == 1 && raytype("importance_bake") != 1) {
|
||||
/* get 2 pixels data */
|
||||
color pixel_bottom = color(nishita_data[0], nishita_data[1], nishita_data[2]);
|
||||
|
@@ -84,8 +84,8 @@ ccl_device_inline void sample_uniform_cone(const float3 N,
|
||||
ccl_device_inline float pdf_uniform_cone(const float3 N, float3 D, float angle)
|
||||
{
|
||||
float zMin = cosf(angle);
|
||||
float z = dot(N, D);
|
||||
if (z > zMin) {
|
||||
float z = precise_angle(N, D);
|
||||
if (z < angle) {
|
||||
return M_1_2PI_F / (1.0f - zMin);
|
||||
}
|
||||
return 0.0f;
|
||||
|
@@ -138,12 +138,13 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals kg,
|
||||
/* definitions */
|
||||
float3 sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2_F);
|
||||
float sun_dir_angle = precise_angle(dir, sun_dir);
|
||||
float half_angular = angular_diameter / 2.0f;
|
||||
float half_angular = angular_diameter * 0.5f;
|
||||
float dir_elevation = M_PI_2_F - direction.x;
|
||||
|
||||
/* if ray inside sun disc render it, otherwise render sky.
|
||||
* alternatively, ignore the sun if we're evaluating the background texture. */
|
||||
if (sun_disc && sun_dir_angle < half_angular && !(path_flag & PATH_RAY_IMPORTANCE_BAKE)) {
|
||||
/* If the ray is inside the sun disc, render it, otherwise render the sky.
|
||||
* Alternatively, ignore the sun if we're evaluating the background texture. */
|
||||
if (sun_disc && sun_dir_angle < half_angular &&
|
||||
!((path_flag & PATH_RAY_IMPORTANCE_BAKE) && kernel_data.background.use_sun_guiding)) {
|
||||
/* get 2 pixels data */
|
||||
float y;
|
||||
|
||||
|
@@ -3,8 +3,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
|
||||
defined(WITH_EMBREE)
|
||||
# if EMBREE_MAJOR_VERSION == 4
|
||||
# include <embree4/rtcore.h>
|
||||
# include <embree4/rtcore_scene.h>
|
||||
# else
|
||||
@@ -78,9 +79,8 @@ CCL_NAMESPACE_BEGIN
|
||||
#define __VISIBILITY_FLAG__
|
||||
#define __VOLUME__
|
||||
|
||||
/* TODO: solve internal compiler errors and enable light tree on HIP. */
|
||||
/* TODO: solve internal compiler perf issue and enable light tree on Metal/AMD. */
|
||||
#if defined(__KERNEL_HIP__) || defined(__KERNEL_METAL_AMD__)
|
||||
#if defined(__KERNEL_METAL_AMD__)
|
||||
# undef __LIGHT_TREE__
|
||||
#endif
|
||||
|
||||
@@ -1370,6 +1370,13 @@ using BoundingCone = struct BoundingCone {
|
||||
float theta_e;
|
||||
};
|
||||
|
||||
enum LightTreeNodeType : uint8_t {
|
||||
LIGHT_TREE_INSTANCE = (1 << 0),
|
||||
LIGHT_TREE_INNER = (1 << 1),
|
||||
LIGHT_TREE_LEAF = (1 << 2),
|
||||
LIGHT_TREE_DISTANT = (1 << 3),
|
||||
};
|
||||
|
||||
typedef struct KernelLightTreeNode {
|
||||
/* Bounding box. */
|
||||
BoundingBox bbox;
|
||||
@@ -1380,17 +1387,25 @@ typedef struct KernelLightTreeNode {
|
||||
/* Energy. */
|
||||
float energy;
|
||||
|
||||
/* If this is 0 or less, we're at a leaf node
|
||||
* and the negative value indexes into the first child of the light array.
|
||||
* Otherwise, it's an index to the node's second child. */
|
||||
int child_index;
|
||||
int num_emitters; /* leaf nodes need to know the number of emitters stored. */
|
||||
LightTreeNodeType type;
|
||||
|
||||
/* Leaf nodes need to know the number of emitters stored. */
|
||||
int num_emitters;
|
||||
|
||||
union {
|
||||
struct {
|
||||
int first_emitter; /* The index of the first emitter. */
|
||||
} leaf;
|
||||
struct {
|
||||
int right_child; /* The index of the right child. */
|
||||
} inner;
|
||||
struct {
|
||||
int reference; /* A reference to the node with the subtree. */
|
||||
} instance;
|
||||
};
|
||||
|
||||
/* Bit trail. */
|
||||
uint bit_trail;
|
||||
|
||||
/* Padding. */
|
||||
int pad;
|
||||
} KernelLightTreeNode;
|
||||
static_assert_align(KernelLightTreeNode, 16);
|
||||
|
||||
@@ -1402,10 +1417,23 @@ typedef struct KernelLightTreeEmitter {
|
||||
/* Energy. */
|
||||
float energy;
|
||||
|
||||
/* The location in the lights or triangles array. */
|
||||
int prim_id;
|
||||
union {
|
||||
struct {
|
||||
int id; /* The location in the triangles array. */
|
||||
EmissionSampling emission_sampling;
|
||||
} triangle;
|
||||
|
||||
struct {
|
||||
int id; /* The location in the lights array. */
|
||||
} light;
|
||||
|
||||
struct {
|
||||
int object_id;
|
||||
int node_id;
|
||||
} mesh;
|
||||
};
|
||||
|
||||
MeshLight mesh_light;
|
||||
EmissionSampling emission_sampling;
|
||||
|
||||
/* Parent. */
|
||||
int parent_index;
|
||||
|
@@ -15,8 +15,12 @@ set(SRC
|
||||
camera.cpp
|
||||
colorspace.cpp
|
||||
constant_fold.cpp
|
||||
devicescene.cpp
|
||||
film.cpp
|
||||
geometry.cpp
|
||||
geometry_attributes.cpp
|
||||
geometry_bvh.cpp
|
||||
geometry_mesh.cpp
|
||||
hair.cpp
|
||||
image.cpp
|
||||
image_oiio.cpp
|
||||
@@ -55,6 +59,7 @@ set(SRC_HEADERS
|
||||
camera.h
|
||||
colorspace.h
|
||||
constant_fold.h
|
||||
devicescene.h
|
||||
film.h
|
||||
geometry.h
|
||||
hair.h
|
||||
|
64
intern/cycles/scene/devicescene.cpp
Normal file
64
intern/cycles/scene/devicescene.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "scene/devicescene.h"
|
||||
#include "device/device.h"
|
||||
#include "device/memory.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
DeviceScene::DeviceScene(Device *device)
|
||||
: bvh_nodes(device, "bvh_nodes", MEM_GLOBAL),
|
||||
bvh_leaf_nodes(device, "bvh_leaf_nodes", MEM_GLOBAL),
|
||||
object_node(device, "object_node", MEM_GLOBAL),
|
||||
prim_type(device, "prim_type", MEM_GLOBAL),
|
||||
prim_visibility(device, "prim_visibility", MEM_GLOBAL),
|
||||
prim_index(device, "prim_index", MEM_GLOBAL),
|
||||
prim_object(device, "prim_object", MEM_GLOBAL),
|
||||
prim_time(device, "prim_time", MEM_GLOBAL),
|
||||
tri_verts(device, "tri_verts", MEM_GLOBAL),
|
||||
tri_shader(device, "tri_shader", MEM_GLOBAL),
|
||||
tri_vnormal(device, "tri_vnormal", MEM_GLOBAL),
|
||||
tri_vindex(device, "tri_vindex", MEM_GLOBAL),
|
||||
tri_patch(device, "tri_patch", MEM_GLOBAL),
|
||||
tri_patch_uv(device, "tri_patch_uv", MEM_GLOBAL),
|
||||
curves(device, "curves", MEM_GLOBAL),
|
||||
curve_keys(device, "curve_keys", MEM_GLOBAL),
|
||||
curve_segments(device, "curve_segments", MEM_GLOBAL),
|
||||
patches(device, "patches", MEM_GLOBAL),
|
||||
points(device, "points", MEM_GLOBAL),
|
||||
points_shader(device, "points_shader", MEM_GLOBAL),
|
||||
objects(device, "objects", MEM_GLOBAL),
|
||||
object_motion_pass(device, "object_motion_pass", MEM_GLOBAL),
|
||||
object_motion(device, "object_motion", MEM_GLOBAL),
|
||||
object_flag(device, "object_flag", MEM_GLOBAL),
|
||||
object_volume_step(device, "object_volume_step", MEM_GLOBAL),
|
||||
object_prim_offset(device, "object_prim_offset", MEM_GLOBAL),
|
||||
camera_motion(device, "camera_motion", MEM_GLOBAL),
|
||||
attributes_map(device, "attributes_map", MEM_GLOBAL),
|
||||
attributes_float(device, "attributes_float", MEM_GLOBAL),
|
||||
attributes_float2(device, "attributes_float2", MEM_GLOBAL),
|
||||
attributes_float3(device, "attributes_float3", MEM_GLOBAL),
|
||||
attributes_float4(device, "attributes_float4", MEM_GLOBAL),
|
||||
attributes_uchar4(device, "attributes_uchar4", MEM_GLOBAL),
|
||||
light_distribution(device, "light_distribution", MEM_GLOBAL),
|
||||
lights(device, "lights", MEM_GLOBAL),
|
||||
light_background_marginal_cdf(device, "light_background_marginal_cdf", MEM_GLOBAL),
|
||||
light_background_conditional_cdf(device, "light_background_conditional_cdf", MEM_GLOBAL),
|
||||
light_tree_nodes(device, "light_tree_nodes", MEM_GLOBAL),
|
||||
light_tree_emitters(device, "light_tree_emitters", MEM_GLOBAL),
|
||||
light_to_tree(device, "light_to_tree", MEM_GLOBAL),
|
||||
object_to_tree(device, "object_to_tree", MEM_GLOBAL),
|
||||
object_lookup_offset(device, "object_lookup_offset", MEM_GLOBAL),
|
||||
triangle_to_tree(device, "triangle_to_tree", MEM_GLOBAL),
|
||||
particles(device, "particles", MEM_GLOBAL),
|
||||
svm_nodes(device, "svm_nodes", MEM_GLOBAL),
|
||||
shaders(device, "shaders", MEM_GLOBAL),
|
||||
lookup_table(device, "lookup_table", MEM_GLOBAL),
|
||||
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
|
||||
ies_lights(device, "ies", MEM_GLOBAL)
|
||||
{
|
||||
memset((void *)&data, 0, sizeof(data));
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
101
intern/cycles/scene/devicescene.h
Normal file
101
intern/cycles/scene/devicescene.h
Normal file
@@ -0,0 +1,101 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __DEVICESCENE_H__
|
||||
#define __DEVICESCENE_H__
|
||||
|
||||
#include "device/device.h"
|
||||
#include "device/memory.h"
|
||||
|
||||
#include "util/types.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceScene {
|
||||
public:
|
||||
/* BVH */
|
||||
device_vector<int4> bvh_nodes;
|
||||
device_vector<int4> bvh_leaf_nodes;
|
||||
device_vector<int> object_node;
|
||||
device_vector<int> prim_type;
|
||||
device_vector<uint> prim_visibility;
|
||||
device_vector<int> prim_index;
|
||||
device_vector<int> prim_object;
|
||||
device_vector<float2> prim_time;
|
||||
|
||||
/* mesh */
|
||||
device_vector<packed_float3> tri_verts;
|
||||
device_vector<uint> tri_shader;
|
||||
device_vector<packed_float3> tri_vnormal;
|
||||
device_vector<packed_uint3> tri_vindex;
|
||||
device_vector<uint> tri_patch;
|
||||
device_vector<float2> tri_patch_uv;
|
||||
|
||||
device_vector<KernelCurve> curves;
|
||||
device_vector<float4> curve_keys;
|
||||
device_vector<KernelCurveSegment> curve_segments;
|
||||
|
||||
device_vector<uint> patches;
|
||||
|
||||
/* point-cloud */
|
||||
device_vector<float4> points;
|
||||
device_vector<uint> points_shader;
|
||||
|
||||
/* objects */
|
||||
device_vector<KernelObject> objects;
|
||||
device_vector<Transform> object_motion_pass;
|
||||
device_vector<DecomposedTransform> object_motion;
|
||||
device_vector<uint> object_flag;
|
||||
device_vector<float> object_volume_step;
|
||||
device_vector<uint> object_prim_offset;
|
||||
|
||||
/* cameras */
|
||||
device_vector<DecomposedTransform> camera_motion;
|
||||
|
||||
/* attributes */
|
||||
device_vector<AttributeMap> attributes_map;
|
||||
device_vector<float> attributes_float;
|
||||
device_vector<float2> attributes_float2;
|
||||
device_vector<packed_float3> attributes_float3;
|
||||
device_vector<float4> attributes_float4;
|
||||
device_vector<uchar4> attributes_uchar4;
|
||||
|
||||
/* lights */
|
||||
device_vector<KernelLightDistribution> light_distribution;
|
||||
device_vector<KernelLight> lights;
|
||||
device_vector<float2> light_background_marginal_cdf;
|
||||
device_vector<float2> light_background_conditional_cdf;
|
||||
|
||||
/* light tree */
|
||||
device_vector<KernelLightTreeNode> light_tree_nodes;
|
||||
device_vector<KernelLightTreeEmitter> light_tree_emitters;
|
||||
device_vector<uint> light_to_tree;
|
||||
device_vector<uint> object_to_tree;
|
||||
device_vector<uint> object_lookup_offset;
|
||||
device_vector<uint> triangle_to_tree;
|
||||
|
||||
/* particles */
|
||||
device_vector<KernelParticle> particles;
|
||||
|
||||
/* shaders */
|
||||
device_vector<int4> svm_nodes;
|
||||
device_vector<KernelShader> shaders;
|
||||
|
||||
/* lookup tables */
|
||||
device_vector<float> lookup_table;
|
||||
|
||||
/* integrator */
|
||||
device_vector<float> sample_pattern_lut;
|
||||
|
||||
/* IES lights */
|
||||
device_vector<float> ies_lights;
|
||||
|
||||
KernelData data;
|
||||
|
||||
DeviceScene(Device *device);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __DEVICESCENE_H__ */
|
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,38 @@ class Shader;
|
||||
class Volume;
|
||||
struct PackedBVH;
|
||||
|
||||
/* Set of flags used to help determining what data has been modified or needs reallocation, so we
|
||||
* can decide which device data to free or update. */
|
||||
enum {
|
||||
DEVICE_CURVE_DATA_MODIFIED = (1 << 0),
|
||||
DEVICE_MESH_DATA_MODIFIED = (1 << 1),
|
||||
DEVICE_POINT_DATA_MODIFIED = (1 << 2),
|
||||
|
||||
ATTR_FLOAT_MODIFIED = (1 << 3),
|
||||
ATTR_FLOAT2_MODIFIED = (1 << 4),
|
||||
ATTR_FLOAT3_MODIFIED = (1 << 5),
|
||||
ATTR_FLOAT4_MODIFIED = (1 << 6),
|
||||
ATTR_UCHAR4_MODIFIED = (1 << 7),
|
||||
|
||||
CURVE_DATA_NEED_REALLOC = (1 << 8),
|
||||
MESH_DATA_NEED_REALLOC = (1 << 9),
|
||||
POINT_DATA_NEED_REALLOC = (1 << 10),
|
||||
|
||||
ATTR_FLOAT_NEEDS_REALLOC = (1 << 11),
|
||||
ATTR_FLOAT2_NEEDS_REALLOC = (1 << 12),
|
||||
ATTR_FLOAT3_NEEDS_REALLOC = (1 << 13),
|
||||
ATTR_FLOAT4_NEEDS_REALLOC = (1 << 14),
|
||||
|
||||
ATTR_UCHAR4_NEEDS_REALLOC = (1 << 15),
|
||||
|
||||
ATTRS_NEED_REALLOC = (ATTR_FLOAT_NEEDS_REALLOC | ATTR_FLOAT2_NEEDS_REALLOC |
|
||||
ATTR_FLOAT3_NEEDS_REALLOC | ATTR_FLOAT4_NEEDS_REALLOC |
|
||||
ATTR_UCHAR4_NEEDS_REALLOC),
|
||||
DEVICE_MESH_DATA_NEEDS_REALLOC = (MESH_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
|
||||
DEVICE_POINT_DATA_NEEDS_REALLOC = (POINT_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
|
||||
DEVICE_CURVE_DATA_NEEDS_REALLOC = (CURVE_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
|
||||
};
|
||||
|
||||
/* Geometry
|
||||
*
|
||||
* Base class for geometric types like Mesh and Hair. */
|
||||
|
722
intern/cycles/scene/geometry_attributes.cpp
Normal file
722
intern/cycles/scene/geometry_attributes.cpp
Normal file
@@ -0,0 +1,722 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "bvh/bvh.h"
|
||||
#include "bvh/bvh2.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/camera.h"
|
||||
#include "scene/geometry.h"
|
||||
#include "scene/hair.h"
|
||||
#include "scene/light.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/scene.h"
|
||||
#include "scene/shader.h"
|
||||
#include "scene/shader_nodes.h"
|
||||
#include "scene/stats.h"
|
||||
#include "scene/volume.h"
|
||||
|
||||
#include "subd/patch_table.h"
|
||||
#include "subd/split.h"
|
||||
|
||||
#include "kernel/osl/globals.h"
|
||||
|
||||
#include "util/foreach.h"
|
||||
#include "util/log.h"
|
||||
#include "util/progress.h"
|
||||
#include "util/task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
bool Geometry::need_attribute(Scene *scene, AttributeStandard std)
|
||||
{
|
||||
if (std == ATTR_STD_NONE)
|
||||
return false;
|
||||
|
||||
if (scene->need_global_attribute(std))
|
||||
return true;
|
||||
|
||||
foreach (Node *node, used_shaders) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
if (shader->attributes.find(std))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Geometry::need_attribute(Scene * /*scene*/, ustring name)
|
||||
{
|
||||
if (name == ustring())
|
||||
return false;
|
||||
|
||||
foreach (Node *node, used_shaders) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
if (shader->attributes.find(name))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
AttributeRequestSet Geometry::needed_attributes()
|
||||
{
|
||||
AttributeRequestSet result;
|
||||
|
||||
foreach (Node *node, used_shaders) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
result.add(shader->attributes);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Geometry::has_voxel_attributes() const
|
||||
{
|
||||
foreach (const Attribute &attr, attributes.attributes) {
|
||||
if (attr.element == ATTR_ELEMENT_VOXEL) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Generate a normal attribute map entry from an attribute descriptor. */
|
||||
static void emit_attribute_map_entry(AttributeMap *attr_map,
|
||||
size_t index,
|
||||
uint64_t id,
|
||||
TypeDesc type,
|
||||
const AttributeDescriptor &desc)
|
||||
{
|
||||
attr_map[index].id = id;
|
||||
attr_map[index].element = desc.element;
|
||||
attr_map[index].offset = as_uint(desc.offset);
|
||||
|
||||
if (type == TypeDesc::TypeFloat)
|
||||
attr_map[index].type = NODE_ATTR_FLOAT;
|
||||
else if (type == TypeDesc::TypeMatrix)
|
||||
attr_map[index].type = NODE_ATTR_MATRIX;
|
||||
else if (type == TypeFloat2)
|
||||
attr_map[index].type = NODE_ATTR_FLOAT2;
|
||||
else if (type == TypeFloat4)
|
||||
attr_map[index].type = NODE_ATTR_FLOAT4;
|
||||
else if (type == TypeRGBA)
|
||||
attr_map[index].type = NODE_ATTR_RGBA;
|
||||
else
|
||||
attr_map[index].type = NODE_ATTR_FLOAT3;
|
||||
|
||||
attr_map[index].flags = desc.flags;
|
||||
}
|
||||
|
||||
/* Generate an attribute map end marker, optionally including a link to another map.
|
||||
* Links are used to connect object attribute maps to mesh attribute maps. */
|
||||
static void emit_attribute_map_terminator(AttributeMap *attr_map,
|
||||
size_t index,
|
||||
bool chain,
|
||||
uint chain_link)
|
||||
{
|
||||
for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
|
||||
attr_map[index + j].id = ATTR_STD_NONE;
|
||||
attr_map[index + j].element = chain; /* link is valid flag */
|
||||
attr_map[index + j].offset = chain ? chain_link + j : 0; /* link to the correct sub-entry */
|
||||
attr_map[index + j].type = 0;
|
||||
attr_map[index + j].flags = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate all necessary attribute map entries from the attribute request. */
|
||||
static void emit_attribute_mapping(
|
||||
AttributeMap *attr_map, size_t index, uint64_t id, AttributeRequest &req, Geometry *geom)
|
||||
{
|
||||
emit_attribute_map_entry(attr_map, index, id, req.type, req.desc);
|
||||
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
if (mesh->get_num_subd_faces()) {
|
||||
emit_attribute_map_entry(attr_map, index + 1, id, req.subd_type, req.subd_desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GeometryManager::update_svm_attributes(Device *,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes)
|
||||
{
|
||||
/* for SVM, the attributes_map table is used to lookup the offset of an
|
||||
* attribute, based on a unique shader attribute id. */
|
||||
|
||||
/* compute array stride */
|
||||
size_t attr_map_size = 0;
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
geom->attr_map_offset = attr_map_size;
|
||||
|
||||
#ifdef WITH_OSL
|
||||
size_t attr_count = 0;
|
||||
foreach (AttributeRequest &req, geom_attributes[i].requests) {
|
||||
if (req.std != ATTR_STD_NONE &&
|
||||
scene->shader_manager->get_attribute_id(req.std) != (uint64_t)req.std)
|
||||
attr_count += 2;
|
||||
else
|
||||
attr_count += 1;
|
||||
}
|
||||
#else
|
||||
const size_t attr_count = geom_attributes[i].size();
|
||||
#endif
|
||||
|
||||
attr_map_size += (attr_count + 1) * ATTR_PRIM_TYPES;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
|
||||
/* only allocate a table for the object if it actually has attributes */
|
||||
if (object_attributes[i].size() == 0) {
|
||||
object->attr_map_offset = 0;
|
||||
}
|
||||
else {
|
||||
object->attr_map_offset = attr_map_size;
|
||||
attr_map_size += (object_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
|
||||
}
|
||||
}
|
||||
|
||||
if (attr_map_size == 0)
|
||||
return;
|
||||
|
||||
if (!dscene->attributes_map.need_realloc()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* create attribute map */
|
||||
AttributeMap *attr_map = dscene->attributes_map.alloc(attr_map_size);
|
||||
memset(attr_map, 0, dscene->attributes_map.size() * sizeof(*attr_map));
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
AttributeRequestSet &attributes = geom_attributes[i];
|
||||
|
||||
/* set geometry attributes */
|
||||
size_t index = geom->attr_map_offset;
|
||||
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
uint64_t id;
|
||||
if (req.std == ATTR_STD_NONE)
|
||||
id = scene->shader_manager->get_attribute_id(req.name);
|
||||
else
|
||||
id = scene->shader_manager->get_attribute_id(req.std);
|
||||
|
||||
emit_attribute_mapping(attr_map, index, id, req, geom);
|
||||
index += ATTR_PRIM_TYPES;
|
||||
|
||||
#ifdef WITH_OSL
|
||||
/* Some standard attributes are explicitly referenced via their standard ID, so add those
|
||||
* again in case they were added under a different attribute ID. */
|
||||
if (req.std != ATTR_STD_NONE && id != (uint64_t)req.std) {
|
||||
emit_attribute_mapping(attr_map, index, (uint64_t)req.std, req, geom);
|
||||
index += ATTR_PRIM_TYPES;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
emit_attribute_map_terminator(attr_map, index, false, 0);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
AttributeRequestSet &attributes = object_attributes[i];
|
||||
|
||||
/* set object attributes */
|
||||
if (attributes.size() > 0) {
|
||||
size_t index = object->attr_map_offset;
|
||||
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
uint64_t id;
|
||||
if (req.std == ATTR_STD_NONE)
|
||||
id = scene->shader_manager->get_attribute_id(req.name);
|
||||
else
|
||||
id = scene->shader_manager->get_attribute_id(req.std);
|
||||
|
||||
emit_attribute_mapping(attr_map, index, id, req, object->geometry);
|
||||
index += ATTR_PRIM_TYPES;
|
||||
}
|
||||
|
||||
emit_attribute_map_terminator(attr_map, index, true, object->geometry->attr_map_offset);
|
||||
}
|
||||
}
|
||||
|
||||
/* copy to device */
|
||||
dscene->attributes_map.copy_to_device();
|
||||
}
|
||||
|
||||
void GeometryManager::update_attribute_element_offset(Geometry *geom,
|
||||
device_vector<float> &attr_float,
|
||||
size_t &attr_float_offset,
|
||||
device_vector<float2> &attr_float2,
|
||||
size_t &attr_float2_offset,
|
||||
device_vector<packed_float3> &attr_float3,
|
||||
size_t &attr_float3_offset,
|
||||
device_vector<float4> &attr_float4,
|
||||
size_t &attr_float4_offset,
|
||||
device_vector<uchar4> &attr_uchar4,
|
||||
size_t &attr_uchar4_offset,
|
||||
Attribute *mattr,
|
||||
AttributePrimitive prim,
|
||||
TypeDesc &type,
|
||||
AttributeDescriptor &desc)
|
||||
{
|
||||
if (mattr) {
|
||||
/* store element and type */
|
||||
desc.element = mattr->element;
|
||||
desc.flags = mattr->flags;
|
||||
type = mattr->type;
|
||||
|
||||
/* store attribute data in arrays */
|
||||
size_t size = mattr->element_size(geom, prim);
|
||||
|
||||
AttributeElement &element = desc.element;
|
||||
int &offset = desc.offset;
|
||||
|
||||
if (mattr->element == ATTR_ELEMENT_VOXEL) {
|
||||
/* store slot in offset value */
|
||||
ImageHandle &handle = mattr->data_voxel();
|
||||
offset = handle.svm_slot();
|
||||
}
|
||||
else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
|
||||
uchar4 *data = mattr->data_uchar4();
|
||||
offset = attr_uchar4_offset;
|
||||
|
||||
assert(attr_uchar4.size() >= offset + size);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size; k++) {
|
||||
attr_uchar4[offset + k] = data[k];
|
||||
}
|
||||
attr_uchar4.tag_modified();
|
||||
}
|
||||
attr_uchar4_offset += size;
|
||||
}
|
||||
else if (mattr->type == TypeDesc::TypeFloat) {
|
||||
float *data = mattr->data_float();
|
||||
offset = attr_float_offset;
|
||||
|
||||
assert(attr_float.size() >= offset + size);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size; k++) {
|
||||
attr_float[offset + k] = data[k];
|
||||
}
|
||||
attr_float.tag_modified();
|
||||
}
|
||||
attr_float_offset += size;
|
||||
}
|
||||
else if (mattr->type == TypeFloat2) {
|
||||
float2 *data = mattr->data_float2();
|
||||
offset = attr_float2_offset;
|
||||
|
||||
assert(attr_float2.size() >= offset + size);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size; k++) {
|
||||
attr_float2[offset + k] = data[k];
|
||||
}
|
||||
attr_float2.tag_modified();
|
||||
}
|
||||
attr_float2_offset += size;
|
||||
}
|
||||
else if (mattr->type == TypeDesc::TypeMatrix) {
|
||||
Transform *tfm = mattr->data_transform();
|
||||
offset = attr_float4_offset;
|
||||
|
||||
assert(attr_float4.size() >= offset + size * 3);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size * 3; k++) {
|
||||
attr_float4[offset + k] = (&tfm->x)[k];
|
||||
}
|
||||
attr_float4.tag_modified();
|
||||
}
|
||||
attr_float4_offset += size * 3;
|
||||
}
|
||||
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
|
||||
float4 *data = mattr->data_float4();
|
||||
offset = attr_float4_offset;
|
||||
|
||||
assert(attr_float4.size() >= offset + size);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size; k++) {
|
||||
attr_float4[offset + k] = data[k];
|
||||
}
|
||||
attr_float4.tag_modified();
|
||||
}
|
||||
attr_float4_offset += size;
|
||||
}
|
||||
else {
|
||||
float3 *data = mattr->data_float3();
|
||||
offset = attr_float3_offset;
|
||||
|
||||
assert(attr_float3.size() >= offset + size);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size; k++) {
|
||||
attr_float3[offset + k] = data[k];
|
||||
}
|
||||
attr_float3.tag_modified();
|
||||
}
|
||||
attr_float3_offset += size;
|
||||
}
|
||||
|
||||
/* mesh vertex/curve index is global, not per object, so we sneak
|
||||
* a correction for that in here */
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
|
||||
desc.flags & ATTR_SUBDIVIDED) {
|
||||
/* Indices for subdivided attributes are retrieved
|
||||
* from patch table so no need for correction here. */
|
||||
}
|
||||
else if (element == ATTR_ELEMENT_VERTEX)
|
||||
offset -= mesh->vert_offset;
|
||||
else if (element == ATTR_ELEMENT_VERTEX_MOTION)
|
||||
offset -= mesh->vert_offset;
|
||||
else if (element == ATTR_ELEMENT_FACE) {
|
||||
if (prim == ATTR_PRIM_GEOMETRY)
|
||||
offset -= mesh->prim_offset;
|
||||
else
|
||||
offset -= mesh->face_offset;
|
||||
}
|
||||
else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
|
||||
if (prim == ATTR_PRIM_GEOMETRY)
|
||||
offset -= 3 * mesh->prim_offset;
|
||||
else
|
||||
offset -= mesh->corner_offset;
|
||||
}
|
||||
}
|
||||
else if (geom->is_hair()) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
if (element == ATTR_ELEMENT_CURVE)
|
||||
offset -= hair->prim_offset;
|
||||
else if (element == ATTR_ELEMENT_CURVE_KEY)
|
||||
offset -= hair->curve_key_offset;
|
||||
else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
|
||||
offset -= hair->curve_key_offset;
|
||||
}
|
||||
else if (geom->is_pointcloud()) {
|
||||
if (element == ATTR_ELEMENT_VERTEX)
|
||||
offset -= geom->prim_offset;
|
||||
else if (element == ATTR_ELEMENT_VERTEX_MOTION)
|
||||
offset -= geom->prim_offset;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* attribute not found */
|
||||
desc.element = ATTR_ELEMENT_NONE;
|
||||
desc.offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void update_attribute_element_size(Geometry *geom,
|
||||
Attribute *mattr,
|
||||
AttributePrimitive prim,
|
||||
size_t *attr_float_size,
|
||||
size_t *attr_float2_size,
|
||||
size_t *attr_float3_size,
|
||||
size_t *attr_float4_size,
|
||||
size_t *attr_uchar4_size)
|
||||
{
|
||||
if (mattr) {
|
||||
size_t size = mattr->element_size(geom, prim);
|
||||
|
||||
if (mattr->element == ATTR_ELEMENT_VOXEL) {
|
||||
/* pass */
|
||||
}
|
||||
else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
|
||||
*attr_uchar4_size += size;
|
||||
}
|
||||
else if (mattr->type == TypeDesc::TypeFloat) {
|
||||
*attr_float_size += size;
|
||||
}
|
||||
else if (mattr->type == TypeFloat2) {
|
||||
*attr_float2_size += size;
|
||||
}
|
||||
else if (mattr->type == TypeDesc::TypeMatrix) {
|
||||
*attr_float4_size += size * 4;
|
||||
}
|
||||
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
|
||||
*attr_float4_size += size;
|
||||
}
|
||||
else {
|
||||
*attr_float3_size += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_attributes(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
progress.set_status("Updating Mesh", "Computing attributes");
|
||||
|
||||
/* gather per mesh requested attributes. as meshes may have multiple
|
||||
* shaders assigned, this merges the requested attributes that have
|
||||
* been set per shader by the shader manager */
|
||||
vector<AttributeRequestSet> geom_attributes(scene->geometry.size());
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
|
||||
geom->index = i;
|
||||
scene->need_global_attributes(geom_attributes[i]);
|
||||
|
||||
foreach (Node *node, geom->get_used_shaders()) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
geom_attributes[i].add(shader->attributes);
|
||||
}
|
||||
|
||||
if (geom->is_hair() && static_cast<Hair *>(geom)->need_shadow_transparency()) {
|
||||
geom_attributes[i].add(ATTR_STD_SHADOW_TRANSPARENCY);
|
||||
}
|
||||
}
|
||||
|
||||
/* convert object attributes to use the same data structures as geometry ones */
|
||||
vector<AttributeRequestSet> object_attributes(scene->objects.size());
|
||||
vector<AttributeSet> object_attribute_values;
|
||||
|
||||
object_attribute_values.reserve(scene->objects.size());
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
Geometry *geom = object->geometry;
|
||||
size_t geom_idx = geom->index;
|
||||
|
||||
assert(geom_idx < scene->geometry.size() && scene->geometry[geom_idx] == geom);
|
||||
|
||||
object_attribute_values.push_back(AttributeSet(geom, ATTR_PRIM_GEOMETRY));
|
||||
|
||||
AttributeRequestSet &geom_requests = geom_attributes[geom_idx];
|
||||
AttributeRequestSet &attributes = object_attributes[i];
|
||||
AttributeSet &values = object_attribute_values[i];
|
||||
|
||||
for (size_t j = 0; j < object->attributes.size(); j++) {
|
||||
ParamValue ¶m = object->attributes[j];
|
||||
|
||||
/* add attributes that are requested and not already handled by the mesh */
|
||||
if (geom_requests.find(param.name()) && !geom->attributes.find(param.name())) {
|
||||
attributes.add(param.name());
|
||||
|
||||
Attribute *attr = values.add(param.name(), param.type(), ATTR_ELEMENT_OBJECT);
|
||||
assert(param.datasize() == attr->buffer.size());
|
||||
memcpy(attr->buffer.data(), param.data(), param.datasize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* mesh attribute are stored in a single array per data type. here we fill
|
||||
* those arrays, and set the offset and element type to create attribute
|
||||
* maps next */
|
||||
|
||||
/* Pre-allocate attributes to avoid arrays re-allocation which would
|
||||
* take 2x of overall attribute memory usage.
|
||||
*/
|
||||
size_t attr_float_size = 0;
|
||||
size_t attr_float2_size = 0;
|
||||
size_t attr_float3_size = 0;
|
||||
size_t attr_float4_size = 0;
|
||||
size_t attr_uchar4_size = 0;
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
AttributeRequestSet &attributes = geom_attributes[i];
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
Attribute *attr = geom->attributes.find(req);
|
||||
|
||||
update_attribute_element_size(geom,
|
||||
attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
&attr_float_size,
|
||||
&attr_float2_size,
|
||||
&attr_float3_size,
|
||||
&attr_float4_size,
|
||||
&attr_uchar4_size);
|
||||
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
Attribute *subd_attr = mesh->subd_attributes.find(req);
|
||||
|
||||
update_attribute_element_size(mesh,
|
||||
subd_attr,
|
||||
ATTR_PRIM_SUBD,
|
||||
&attr_float_size,
|
||||
&attr_float2_size,
|
||||
&attr_float3_size,
|
||||
&attr_float4_size,
|
||||
&attr_uchar4_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
|
||||
foreach (Attribute &attr, object_attribute_values[i].attributes) {
|
||||
update_attribute_element_size(object->geometry,
|
||||
&attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
&attr_float_size,
|
||||
&attr_float2_size,
|
||||
&attr_float3_size,
|
||||
&attr_float4_size,
|
||||
&attr_uchar4_size);
|
||||
}
|
||||
}
|
||||
|
||||
dscene->attributes_float.alloc(attr_float_size);
|
||||
dscene->attributes_float2.alloc(attr_float2_size);
|
||||
dscene->attributes_float3.alloc(attr_float3_size);
|
||||
dscene->attributes_float4.alloc(attr_float4_size);
|
||||
dscene->attributes_uchar4.alloc(attr_uchar4_size);
|
||||
|
||||
/* The order of those flags needs to match that of AttrKernelDataType. */
|
||||
const bool attributes_need_realloc[AttrKernelDataType::NUM] = {
|
||||
dscene->attributes_float.need_realloc(),
|
||||
dscene->attributes_float2.need_realloc(),
|
||||
dscene->attributes_float3.need_realloc(),
|
||||
dscene->attributes_float4.need_realloc(),
|
||||
dscene->attributes_uchar4.need_realloc(),
|
||||
};
|
||||
|
||||
size_t attr_float_offset = 0;
|
||||
size_t attr_float2_offset = 0;
|
||||
size_t attr_float3_offset = 0;
|
||||
size_t attr_float4_offset = 0;
|
||||
size_t attr_uchar4_offset = 0;
|
||||
|
||||
/* Fill in attributes. */
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
AttributeRequestSet &attributes = geom_attributes[i];
|
||||
|
||||
/* todo: we now store std and name attributes from requests even if
|
||||
* they actually refer to the same mesh attributes, optimize */
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
Attribute *attr = geom->attributes.find(req);
|
||||
|
||||
if (attr) {
|
||||
/* force a copy if we need to reallocate all the data */
|
||||
attr->modified |= attributes_need_realloc[Attribute::kernel_type(*attr)];
|
||||
}
|
||||
|
||||
update_attribute_element_offset(geom,
|
||||
dscene->attributes_float,
|
||||
attr_float_offset,
|
||||
dscene->attributes_float2,
|
||||
attr_float2_offset,
|
||||
dscene->attributes_float3,
|
||||
attr_float3_offset,
|
||||
dscene->attributes_float4,
|
||||
attr_float4_offset,
|
||||
dscene->attributes_uchar4,
|
||||
attr_uchar4_offset,
|
||||
attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
req.type,
|
||||
req.desc);
|
||||
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
Attribute *subd_attr = mesh->subd_attributes.find(req);
|
||||
|
||||
if (subd_attr) {
|
||||
/* force a copy if we need to reallocate all the data */
|
||||
subd_attr->modified |= attributes_need_realloc[Attribute::kernel_type(*subd_attr)];
|
||||
}
|
||||
|
||||
update_attribute_element_offset(mesh,
|
||||
dscene->attributes_float,
|
||||
attr_float_offset,
|
||||
dscene->attributes_float2,
|
||||
attr_float2_offset,
|
||||
dscene->attributes_float3,
|
||||
attr_float3_offset,
|
||||
dscene->attributes_float4,
|
||||
attr_float4_offset,
|
||||
dscene->attributes_uchar4,
|
||||
attr_uchar4_offset,
|
||||
subd_attr,
|
||||
ATTR_PRIM_SUBD,
|
||||
req.subd_type,
|
||||
req.subd_desc);
|
||||
}
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
AttributeRequestSet &attributes = object_attributes[i];
|
||||
AttributeSet &values = object_attribute_values[i];
|
||||
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
Attribute *attr = values.find(req);
|
||||
|
||||
if (attr) {
|
||||
attr->modified |= attributes_need_realloc[Attribute::kernel_type(*attr)];
|
||||
}
|
||||
|
||||
update_attribute_element_offset(object->geometry,
|
||||
dscene->attributes_float,
|
||||
attr_float_offset,
|
||||
dscene->attributes_float2,
|
||||
attr_float2_offset,
|
||||
dscene->attributes_float3,
|
||||
attr_float3_offset,
|
||||
dscene->attributes_float4,
|
||||
attr_float4_offset,
|
||||
dscene->attributes_uchar4,
|
||||
attr_uchar4_offset,
|
||||
attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
req.type,
|
||||
req.desc);
|
||||
|
||||
/* object attributes don't care about subdivision */
|
||||
req.subd_type = req.type;
|
||||
req.subd_desc = req.desc;
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* create attribute lookup maps */
|
||||
if (scene->shader_manager->use_osl())
|
||||
update_osl_globals(device, scene);
|
||||
|
||||
update_svm_attributes(device, dscene, scene, geom_attributes, object_attributes);
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
|
||||
/* copy to device */
|
||||
progress.set_status("Updating Mesh", "Copying Attributes to device");
|
||||
|
||||
dscene->attributes_float.copy_to_device_if_modified();
|
||||
dscene->attributes_float2.copy_to_device_if_modified();
|
||||
dscene->attributes_float3.copy_to_device_if_modified();
|
||||
dscene->attributes_float4.copy_to_device_if_modified();
|
||||
dscene->attributes_uchar4.copy_to_device_if_modified();
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
|
||||
/* After mesh attributes and patch tables have been copied to device memory,
|
||||
* we need to update offsets in the objects. */
|
||||
scene->object_manager->device_update_geom_offsets(device, dscene, scene);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
196
intern/cycles/scene/geometry_bvh.cpp
Normal file
196
intern/cycles/scene/geometry_bvh.cpp
Normal file
@@ -0,0 +1,196 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "bvh/bvh.h"
|
||||
#include "bvh/bvh2.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/camera.h"
|
||||
#include "scene/geometry.h"
|
||||
#include "scene/hair.h"
|
||||
#include "scene/light.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/scene.h"
|
||||
#include "scene/shader.h"
|
||||
#include "scene/shader_nodes.h"
|
||||
#include "scene/stats.h"
|
||||
#include "scene/volume.h"
|
||||
|
||||
#include "subd/patch_table.h"
|
||||
#include "subd/split.h"
|
||||
|
||||
#include "kernel/osl/globals.h"
|
||||
|
||||
#include "util/foreach.h"
|
||||
#include "util/log.h"
|
||||
#include "util/progress.h"
|
||||
#include "util/task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void Geometry::compute_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
SceneParams *params,
|
||||
Progress *progress,
|
||||
size_t n,
|
||||
size_t total)
|
||||
{
|
||||
if (progress->get_cancel())
|
||||
return;
|
||||
|
||||
compute_bounds();
|
||||
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
|
||||
params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
if (need_build_bvh(bvh_layout)) {
|
||||
string msg = "Updating Geometry BVH ";
|
||||
if (name.empty())
|
||||
msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
|
||||
else
|
||||
msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
|
||||
|
||||
Object object;
|
||||
|
||||
/* Ensure all visibility bits are set at the geometry level BVH. In
|
||||
* the object level BVH is where actual visibility is tested. */
|
||||
object.set_is_shadow_catcher(true);
|
||||
object.set_visibility(~0);
|
||||
|
||||
object.set_geometry(this);
|
||||
|
||||
vector<Geometry *> geometry;
|
||||
geometry.push_back(this);
|
||||
vector<Object *> objects;
|
||||
objects.push_back(&object);
|
||||
|
||||
if (bvh && !need_update_rebuild) {
|
||||
progress->set_status(msg, "Refitting BVH");
|
||||
|
||||
bvh->replace_geometry(geometry, objects);
|
||||
|
||||
device->build_bvh(bvh, *progress, true);
|
||||
}
|
||||
else {
|
||||
progress->set_status(msg, "Building BVH");
|
||||
|
||||
BVHParams bparams;
|
||||
bparams.use_spatial_split = params->use_bvh_spatial_split;
|
||||
bparams.use_compact_structure = params->use_bvh_compact_structure;
|
||||
bparams.bvh_layout = bvh_layout;
|
||||
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
|
||||
params->use_bvh_unaligned_nodes;
|
||||
bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
|
||||
bparams.num_motion_curve_steps = params->num_bvh_time_steps;
|
||||
bparams.num_motion_point_steps = params->num_bvh_time_steps;
|
||||
bparams.bvh_type = params->bvh_type;
|
||||
bparams.curve_subdivisions = params->curve_subdivisions();
|
||||
|
||||
delete bvh;
|
||||
bvh = BVH::create(bparams, geometry, objects, device);
|
||||
MEM_GUARDED_CALL(progress, device->build_bvh, bvh, *progress, false);
|
||||
}
|
||||
}
|
||||
|
||||
need_update_rebuild = false;
|
||||
need_update_bvh_for_offset = false;
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
/* bvh build */
|
||||
progress.set_status("Updating Scene BVH", "Building");
|
||||
|
||||
BVHParams bparams;
|
||||
bparams.top_level = true;
|
||||
bparams.bvh_layout = BVHParams::best_bvh_layout(
|
||||
scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
|
||||
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
|
||||
scene->params.use_bvh_unaligned_nodes;
|
||||
bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
|
||||
bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
|
||||
bparams.num_motion_point_steps = scene->params.num_bvh_time_steps;
|
||||
bparams.bvh_type = scene->params.bvh_type;
|
||||
bparams.curve_subdivisions = scene->params.curve_subdivisions();
|
||||
|
||||
VLOG_INFO << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
|
||||
|
||||
const bool can_refit = scene->bvh != nullptr &&
|
||||
(bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX ||
|
||||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL);
|
||||
|
||||
BVH *bvh = scene->bvh;
|
||||
if (!scene->bvh) {
|
||||
bvh = scene->bvh = BVH::create(bparams, scene->geometry, scene->objects, device);
|
||||
}
|
||||
|
||||
device->build_bvh(bvh, progress, can_refit);
|
||||
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const bool has_bvh2_layout = (bparams.bvh_layout == BVH_LAYOUT_BVH2);
|
||||
|
||||
PackedBVH pack;
|
||||
if (has_bvh2_layout) {
|
||||
pack = std::move(static_cast<BVH2 *>(bvh)->pack);
|
||||
}
|
||||
else {
|
||||
pack.root_index = -1;
|
||||
}
|
||||
|
||||
/* copy to device */
|
||||
progress.set_status("Updating Scene BVH", "Copying BVH to device");
|
||||
|
||||
/* When using BVH2, we always have to copy/update the data as its layout is dependent on the
|
||||
* BVH's leaf nodes which may be different when the objects or vertices move. */
|
||||
|
||||
if (pack.nodes.size()) {
|
||||
dscene->bvh_nodes.steal_data(pack.nodes);
|
||||
dscene->bvh_nodes.copy_to_device();
|
||||
}
|
||||
if (pack.leaf_nodes.size()) {
|
||||
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
|
||||
dscene->bvh_leaf_nodes.copy_to_device();
|
||||
}
|
||||
if (pack.object_node.size()) {
|
||||
dscene->object_node.steal_data(pack.object_node);
|
||||
dscene->object_node.copy_to_device();
|
||||
}
|
||||
if (pack.prim_type.size()) {
|
||||
dscene->prim_type.steal_data(pack.prim_type);
|
||||
dscene->prim_type.copy_to_device();
|
||||
}
|
||||
if (pack.prim_visibility.size()) {
|
||||
dscene->prim_visibility.steal_data(pack.prim_visibility);
|
||||
dscene->prim_visibility.copy_to_device();
|
||||
}
|
||||
if (pack.prim_index.size()) {
|
||||
dscene->prim_index.steal_data(pack.prim_index);
|
||||
dscene->prim_index.copy_to_device();
|
||||
}
|
||||
if (pack.prim_object.size()) {
|
||||
dscene->prim_object.steal_data(pack.prim_object);
|
||||
dscene->prim_object.copy_to_device();
|
||||
}
|
||||
if (pack.prim_time.size()) {
|
||||
dscene->prim_time.steal_data(pack.prim_time);
|
||||
dscene->prim_time.copy_to_device();
|
||||
}
|
||||
|
||||
dscene->data.bvh.root = pack.root_index;
|
||||
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
|
||||
dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
|
||||
/* The scene handle is set in 'CPUDevice::const_copy_to' and 'OptiXDevice::const_copy_to' */
|
||||
dscene->data.device_bvh = 0;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
223
intern/cycles/scene/geometry_mesh.cpp
Normal file
223
intern/cycles/scene/geometry_mesh.cpp
Normal file
@@ -0,0 +1,223 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "bvh/bvh.h"
|
||||
#include "bvh/bvh2.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/camera.h"
|
||||
#include "scene/geometry.h"
|
||||
#include "scene/hair.h"
|
||||
#include "scene/light.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/osl.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/scene.h"
|
||||
#include "scene/shader.h"
|
||||
#include "scene/shader_nodes.h"
|
||||
#include "scene/stats.h"
|
||||
#include "scene/volume.h"
|
||||
|
||||
#include "subd/patch_table.h"
|
||||
#include "subd/split.h"
|
||||
|
||||
#ifdef WITH_OSL
|
||||
# include "kernel/osl/globals.h"
|
||||
#endif
|
||||
|
||||
#include "util/foreach.h"
|
||||
#include "util/log.h"
|
||||
#include "util/progress.h"
|
||||
#include "util/task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void GeometryManager::device_update_mesh(Device *,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
/* Count. */
|
||||
size_t vert_size = 0;
|
||||
size_t tri_size = 0;
|
||||
|
||||
size_t curve_key_size = 0;
|
||||
size_t curve_size = 0;
|
||||
size_t curve_segment_size = 0;
|
||||
|
||||
size_t point_size = 0;
|
||||
|
||||
size_t patch_size = 0;
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
|
||||
vert_size += mesh->verts.size();
|
||||
tri_size += mesh->num_triangles();
|
||||
|
||||
if (mesh->get_num_subd_faces()) {
|
||||
Mesh::SubdFace last = mesh->get_subd_face(mesh->get_num_subd_faces() - 1);
|
||||
patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
|
||||
|
||||
/* patch tables are stored in same array so include them in patch_size */
|
||||
if (mesh->patch_table) {
|
||||
mesh->patch_table_offset = patch_size;
|
||||
patch_size += mesh->patch_table->total_size();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (geom->is_hair()) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
|
||||
curve_key_size += hair->get_curve_keys().size();
|
||||
curve_size += hair->num_curves();
|
||||
curve_segment_size += hair->num_segments();
|
||||
}
|
||||
else if (geom->is_pointcloud()) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
point_size += pointcloud->num_points();
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill in all the arrays. */
|
||||
if (tri_size != 0) {
|
||||
/* normals */
|
||||
progress.set_status("Updating Mesh", "Computing normals");
|
||||
|
||||
packed_float3 *tri_verts = dscene->tri_verts.alloc(vert_size);
|
||||
uint *tri_shader = dscene->tri_shader.alloc(tri_size);
|
||||
packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size);
|
||||
packed_uint3 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
|
||||
uint *tri_patch = dscene->tri_patch.alloc(tri_size);
|
||||
float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
|
||||
|
||||
const bool copy_all_data = dscene->tri_shader.need_realloc() ||
|
||||
dscene->tri_vindex.need_realloc() ||
|
||||
dscene->tri_vnormal.need_realloc() ||
|
||||
dscene->tri_patch.need_realloc() ||
|
||||
dscene->tri_patch_uv.need_realloc();
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
|
||||
if (mesh->shader_is_modified() || mesh->smooth_is_modified() ||
|
||||
mesh->triangles_is_modified() || copy_all_data) {
|
||||
mesh->pack_shaders(scene, &tri_shader[mesh->prim_offset]);
|
||||
}
|
||||
|
||||
if (mesh->verts_is_modified() || copy_all_data) {
|
||||
mesh->pack_normals(&vnormal[mesh->vert_offset]);
|
||||
}
|
||||
|
||||
if (mesh->verts_is_modified() || mesh->triangles_is_modified() ||
|
||||
mesh->vert_patch_uv_is_modified() || copy_all_data) {
|
||||
mesh->pack_verts(&tri_verts[mesh->vert_offset],
|
||||
&tri_vindex[mesh->prim_offset],
|
||||
&tri_patch[mesh->prim_offset],
|
||||
&tri_patch_uv[mesh->vert_offset]);
|
||||
}
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* vertex coordinates */
|
||||
progress.set_status("Updating Mesh", "Copying Mesh to device");
|
||||
|
||||
dscene->tri_verts.copy_to_device_if_modified();
|
||||
dscene->tri_shader.copy_to_device_if_modified();
|
||||
dscene->tri_vnormal.copy_to_device_if_modified();
|
||||
dscene->tri_vindex.copy_to_device_if_modified();
|
||||
dscene->tri_patch.copy_to_device_if_modified();
|
||||
dscene->tri_patch_uv.copy_to_device_if_modified();
|
||||
}
|
||||
|
||||
if (curve_segment_size != 0) {
|
||||
progress.set_status("Updating Mesh", "Copying Curves to device");
|
||||
|
||||
float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
|
||||
KernelCurve *curves = dscene->curves.alloc(curve_size);
|
||||
KernelCurveSegment *curve_segments = dscene->curve_segments.alloc(curve_segment_size);
|
||||
|
||||
const bool copy_all_data = dscene->curve_keys.need_realloc() ||
|
||||
dscene->curves.need_realloc() ||
|
||||
dscene->curve_segments.need_realloc();
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_hair()) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
|
||||
bool curve_keys_co_modified = hair->curve_radius_is_modified() ||
|
||||
hair->curve_keys_is_modified();
|
||||
bool curve_data_modified = hair->curve_shader_is_modified() ||
|
||||
hair->curve_first_key_is_modified();
|
||||
|
||||
if (!curve_keys_co_modified && !curve_data_modified && !copy_all_data) {
|
||||
continue;
|
||||
}
|
||||
|
||||
hair->pack_curves(scene,
|
||||
&curve_keys[hair->curve_key_offset],
|
||||
&curves[hair->prim_offset],
|
||||
&curve_segments[hair->curve_segment_offset]);
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->curve_keys.copy_to_device_if_modified();
|
||||
dscene->curves.copy_to_device_if_modified();
|
||||
dscene->curve_segments.copy_to_device_if_modified();
|
||||
}
|
||||
|
||||
if (point_size != 0) {
|
||||
progress.set_status("Updating Mesh", "Copying Point clouds to device");
|
||||
|
||||
float4 *points = dscene->points.alloc(point_size);
|
||||
uint *points_shader = dscene->points_shader.alloc(point_size);
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_pointcloud()) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
pointcloud->pack(
|
||||
scene, &points[pointcloud->prim_offset], &points_shader[pointcloud->prim_offset]);
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->points.copy_to_device();
|
||||
dscene->points_shader.copy_to_device();
|
||||
}
|
||||
|
||||
if (patch_size != 0 && dscene->patches.need_realloc()) {
|
||||
progress.set_status("Updating Mesh", "Copying Patches to device");
|
||||
|
||||
uint *patch_data = dscene->patches.alloc(patch_size);
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
mesh->pack_patches(&patch_data[mesh->patch_offset]);
|
||||
|
||||
if (mesh->patch_table) {
|
||||
mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
|
||||
mesh->patch_table_offset);
|
||||
}
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->patches.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user