Compare commits
478 Commits
temp-link-
...
gpu-shader
Author | SHA1 | Date | |
---|---|---|---|
f240ac1673 | |||
a590474f4c | |||
b2462b6f5c | |||
21ee89c52f | |||
2c95da88aa | |||
7358a5aba2 | |||
9159295c3c | |||
62a04f7aa6 | |||
38a3819171 | |||
3844e9dbe7 | |||
ea93e5df6c | |||
60befc8f02 | |||
62b50c612f | |||
9e5aae4215 | |||
c09e8a3590 | |||
792badcfef | |||
c0a2b21744 | |||
fb4851fbbc | |||
b40e930ac7 | |||
cf266ecaa6 | |||
e4986f92f3 | |||
fab39440e9 | |||
a9eb4e6f59 | |||
a6b7f32112 | |||
db450c9320 | |||
70424195a8 | |||
71c80bd939 | |||
2cbb9d7a76 | |||
b716a771b4 | |||
![]() |
3bb8d173e7 | ||
fca8eb0185 | |||
2c2b79191f | |||
1a7c32a0ab | |||
4b13dcaf02 | |||
ceb25cbeba | |||
8897e0aa8f | |||
5efddc4347 | |||
1df8abff25 | |||
47276b8470 | |||
0bedd5d14f | |||
436ce22194 | |||
dab04bc053 | |||
d7b7cbb047 | |||
0479a66313 | |||
611e4ffaab | |||
89b927a720 | |||
fecdf9d44b | |||
b7c98c87ac | |||
a6d1a2d3fc | |||
![]() |
bba6fe83e2 | ||
6ab3349bd4 | |||
cf299bee80 | |||
3e65bb86f9 | |||
f392ce50c4 | |||
cd2849c89b | |||
605cdc4346 | |||
0452a04f1a | |||
6c8f73b220 | |||
b9b98448a6 | |||
fbb4a7eb43 | |||
f657356062 | |||
b02ac2d8be | |||
84be741329 | |||
6987060f70 | |||
28870a8f89 | |||
59754ef0b2 | |||
![]() |
34370d9fdf | ||
![]() |
b42494cf6b | ||
d1a4e043bd | |||
![]() |
f749506163 | ||
8600d4491f | |||
456d5e14b8 | |||
481f032f5c | |||
34615cd269 | |||
48c2b4012f | |||
29681f186e | |||
e2b736aa40 | |||
![]() |
73b1ad1920 | ||
bfff9ca5f1 | |||
![]() |
ee0277271c | ||
cc6bcb53b2 | |||
5ad4ca4e02 | |||
8a84a61f6b | |||
0129178376 | |||
55c82d8380 | |||
1706bf7780 | |||
336ca6796a | |||
25c83c217b | |||
059da44fbc | |||
d7cf7d4048 | |||
fe274d91a1 | |||
9c2a4d158c | |||
875f24352a | |||
![]() |
0624235900 | ||
be876b8db6 | |||
178947dec3 | |||
![]() |
819b9bdfa1 | ||
77ddc6e350 | |||
7e8f9213e9 | |||
7b09213f2f | |||
9b6f3d2d0a | |||
9bbb5f5a6a | |||
29f6ec56e6 | |||
0b246ed813 | |||
31864a40ba | |||
c850189adf | |||
db20837c3a | |||
6c16bb2706 | |||
6eaa69c66c | |||
fb470c256a | |||
873f6148ad | |||
940e6525c7 | |||
15011e0b70 | |||
6ee2abde82 | |||
d455eadcd8 | |||
b3ee9f44cf | |||
1b2ee3cf20 | |||
e949ac0bfc | |||
59ffe1c5b1 | |||
![]() |
f2bb42a095 | ||
3d447b6335 | |||
b20997cb34 | |||
3baaab15fc | |||
411261fb32 | |||
092df87534 | |||
15ecd47b96 | |||
01df48a983 | |||
a0780ad625 | |||
cfbc9df60e | |||
217d0a1524 | |||
51a7961e09 | |||
c3fed4d463 | |||
fb0ea94c63 | |||
50ad0e15fe | |||
00e4d665f4 | |||
fabd088067 | |||
1222c45544 | |||
ba8dd0f24f | |||
06a2e2b28c | |||
ef687bd7c2 | |||
97533eede4 | |||
1b686c60b5 | |||
0f1a200a67 | |||
9e3a913b35 | |||
1a1ddcb5e2 | |||
ec71054a9b | |||
06ead314b6 | |||
330290d2a8 | |||
33c5e7bcd5 | |||
d6ea881a74 | |||
d7aaa145c6 | |||
04ec36f677 | |||
0852805ed7 | |||
a20e703d1a | |||
06691d1b21 | |||
1b94c53aa6 | |||
7d5ef64bfb | |||
fa6a913ef1 | |||
83e245023c | |||
eb071c9ff4 | |||
de3fda29c7 | |||
48e64a5fb5 | |||
4ea6b4ba84 | |||
992634427e | |||
b8dc845e57 | |||
4d09a692e2 | |||
5ed3a5d023 | |||
3f288e9414 | |||
b2d37c35d0 | |||
167ee8f2c7 | |||
fd2a155d06 | |||
f190f2d267 | |||
0c33411bdd | |||
ea42c1a22e | |||
f61a73093b | |||
ada6742601 | |||
701f2dfd5b | |||
b8bf40ed4b | |||
717a971035 | |||
![]() |
2a9cfdac7e | ||
c7f9a782aa | |||
![]() |
25fa6c74b9 | ||
7c4e4d605c | |||
3531021d1b | |||
12fc395436 | |||
![]() |
805181bffa | ||
77df32548b | |||
![]() |
5e6fdaa07f | ||
![]() |
1d1855e95f | ||
cb3ba68ec4 | |||
![]() |
f19bd637e2 | ||
f0be276514 | |||
ed91e759d1 | |||
67b4eecac9 | |||
![]() |
dd31b8bd50 | ||
5816eb4c56 | |||
8d1357ea6b | |||
beb9e332ca | |||
31afa1bb9a | |||
0624acf088 | |||
b926f54f3c | |||
d1f944c186 | |||
d19e35873f | |||
c0d52db783 | |||
f71813204c | |||
3ad2bf1327 | |||
bd2e3bb7bd | |||
2b63a76041 | |||
e5774282b9 | |||
![]() |
e1a3b697ec | ||
![]() |
8c0370ef7b | ||
![]() |
9cf3d841a8 | ||
f1f7a8b018 | |||
a182b05f07 | |||
032ab0270d | |||
![]() |
daaa43232d | ||
ceec400975 | |||
d8fd575af9 | |||
d6b5251572 | |||
b071083496 | |||
fa7a6d67a8 | |||
![]() |
d9bc8f189c | ||
063ad8635e | |||
9937d5379c | |||
89d5714d8f | |||
ea7efa5569 | |||
00a9617f92 | |||
c3422c48ad | |||
e5f05bc7a6 | |||
f5dde382af | |||
83a4d51997 | |||
c2ab47e729 | |||
51b8e34fb7 | |||
473be239c3 | |||
e3c974b7e4 | |||
6e6123b40f | |||
ecad33f214 | |||
61bffa565e | |||
f72dc00569 | |||
9bdf3fa5f0 | |||
4c988eb3e1 | |||
![]() |
dea26253a0 | ||
8290edefad | |||
2f39b45e8c | |||
f829b86039 | |||
93f26d652e | |||
fbf4fe6963 | |||
1e4d1eb398 | |||
59da22c309 | |||
b496c1c721 | |||
3189171a94 | |||
cf83719761 | |||
c9fb08e075 | |||
f30e1fd2f0 | |||
25d30e6c99 | |||
cfd0e96e47 | |||
7293c1b357 | |||
![]() |
9d7422b817 | ||
917218269e | |||
1572c4d3d3 | |||
bd37553850 | |||
0335df9384 | |||
ba6427adfa | |||
b3529ecf0e | |||
72ee62e0da | |||
bee7a56687 | |||
07af45eec5 | |||
![]() |
64003fa4b0 | ||
85ac9b8584 | |||
ce0d817bb7 | |||
12a986c9b5 | |||
c7a1e115b5 | |||
faa8aa3bb9 | |||
052c22199d | |||
7da714f387 | |||
![]() |
da14a482f2 | ||
d4c868da9f | |||
6d35972b06 | |||
7d985d6b69 | |||
57ed435def | |||
7e42ae7c1a | |||
165cacc6f0 | |||
62da6ffe08 | |||
46f5f60c13 | |||
a040d2a93a | |||
7e148c45c8 | |||
d3c45e1c39 | |||
ef8240e64c | |||
10a6a540af | |||
8976b72843 | |||
62da41d63d | |||
c3472cb11c | |||
![]() |
a5c59fb90e | ||
c2c65cc4bf | |||
622e6f05f1 | |||
7e82c840b7 | |||
2549384baa | |||
73047c69ea | |||
e8a8bb67fc | |||
ab9ec193c3 | |||
![]() |
c4ea5cb1a3 | ||
1143bf281a | |||
acc800d24d | |||
dc378bf1a4 | |||
0a6f428be7 | |||
60b8eb30bb | |||
eed48a7322 | |||
738f4fbc5e | |||
ebb4aba325 | |||
6b4ca78108 | |||
ec432ae998 | |||
8d3a771574 | |||
55c69373e8 | |||
8b13cf5667 | |||
5941c39fbf | |||
30f9034182 | |||
e5a7dd8ab6 | |||
5b787c24fb | |||
888b879f5f | |||
1b55b911f2 | |||
71131b4969 | |||
b4d9b8b7f8 | |||
ef0b8d6306 | |||
d845ba481c | |||
cbca71a7cf | |||
809ae823b7 | |||
9d0d4b8601 | |||
8a8bf99717 | |||
2b394e1108 | |||
![]() |
a89529d8db | ||
9f5290e3bc | |||
c671b5eee4 | |||
![]() |
896d3f1ce5 | ||
76105eb752 | |||
d48523cb4d | |||
ddf66cd060 | |||
b4cfe80547 | |||
c7a88cf91a | |||
5f7d5c0809 | |||
a470e3c9d1 | |||
0ea60cf6b8 | |||
3fe735d371 | |||
a47359ff36 | |||
de8a46c6ad | |||
26502f3d89 | |||
2b633f12ad | |||
86ca206db8 | |||
![]() |
456876208b | ||
![]() |
32c7687859 | ||
a87253942d | |||
ae74ad191c | |||
aa1c44a113 | |||
5c0d4753cf | |||
02333544d1 | |||
1061f5a1ba | |||
1a7757b0bc | |||
f133c6b094 | |||
d612d92630 | |||
1e1c870001 | |||
0533f2851e | |||
d6e682a7b0 | |||
bd734cc441 | |||
e61da8e4fb | |||
f3bdabbe24 | |||
50f32025ac | |||
393879f30c | |||
3d9c8397fc | |||
c9c7658926 | |||
3ca41b7312 | |||
![]() |
4bc08b79aa | ||
25e7365d0d | |||
52c617802f | |||
![]() |
7aa39b40f4 | ||
ce395c84a3 | |||
d26d3cfe19 | |||
06a74e7816 | |||
9f31b9b7d3 | |||
9be6880d02 | |||
4a98faf9f1 | |||
8c240f50b2 | |||
b7e2408ea4 | |||
bb64155c63 | |||
![]() |
c63e735f6b | ||
b8d53b703a | |||
9787b46f09 | |||
03f0be35d6 | |||
d753ebd40a | |||
ddf0bacaa9 | |||
3929db265f | |||
e1bd4bbb66 | |||
f1a8644121 | |||
1ec7075ff2 | |||
22ffd69a91 | |||
9ca8bf0b29 | |||
040630bb9a | |||
3fa86f4b28 | |||
7689f501e2 | |||
![]() |
e507a789b3 | ||
6b0008129e | |||
c8e93da0a7 | |||
abf62d06d1 | |||
35ae7ab933 | |||
cc17ed26ce | |||
20224369d9 | |||
e9b7e5e0b9 | |||
67e5edbaa3 | |||
f565620435 | |||
a6e4cb092e | |||
![]() |
bc0c06ecbe | ||
53468c2b13 | |||
47b8baa5c4 | |||
03e22da665 | |||
aa2f6e5977 | |||
ff0c42acfc | |||
a49d6a5350 | |||
bec72a43ae | |||
aa440923c8 | |||
1f6010e609 | |||
![]() |
3a4c8f406a | ||
7b530c6096 | |||
4648c4990c | |||
44239fa106 | |||
7c25399576 | |||
a356e4fb3f | |||
4f246b8bf9 | |||
7383f95443 | |||
74fe19b193 | |||
fd0ba6449b | |||
ed0df0f3c6 | |||
aa13c4b386 | |||
e5fb5c9d7b | |||
![]() |
e452c43fd6 | ||
570331ca96 | |||
0bdf9d10a4 | |||
cc949f0a40 | |||
fb0ae66ee5 | |||
![]() |
368d794407 | ||
8eff3b5fe0 | |||
5f44298280 | |||
9dc3f454d9 | |||
9b2f212016 | |||
07a4338b3a | |||
![]() |
c092cc35b3 | ||
accdd4c1bc | |||
41607ced2b | |||
625349a6bd | |||
65bbac6692 | |||
faeb2cc900 | |||
440a3475b8 | |||
9daf6a69a6 | |||
0bcf014bcf | |||
65c5ebf577 | |||
0b01b81754 | |||
41b0820ddd | |||
45bd98d4cf | |||
6c24cafecc | |||
cb487b6507 | |||
09f1be53d8 | |||
c56cf50bd0 | |||
![]() |
de8e13036b | ||
![]() |
4e2478940e | ||
6b0a6c2ca9 | |||
04b4ec7889 | |||
ad679ee747 | |||
![]() |
486d1e8510 | ||
a7540f4b36 | |||
2eb94f3036 | |||
2772a033c9 | |||
8772a6fb9b | |||
b3597f310d | |||
![]() |
d5d97e4169 | ||
![]() |
afc60f9957 | ||
e0dae0f98f | |||
ab7214ca2e | |||
fe2ed4a229 | |||
abab16f7c7 | |||
33beec1cec | |||
e1c4e5df22 |
@@ -269,5 +269,9 @@ StatementMacros:
|
|||||||
- PyObject_HEAD
|
- PyObject_HEAD
|
||||||
- PyObject_VAR_HEAD
|
- PyObject_VAR_HEAD
|
||||||
|
|
||||||
|
StatementMacros:
|
||||||
|
- GPU_STAGE_INTERFACE_CREATE
|
||||||
|
- GPU_SHADER_DESCRIPTOR
|
||||||
|
|
||||||
MacroBlockBegin: "^BSDF_CLOSURE_CLASS_BEGIN$"
|
MacroBlockBegin: "^BSDF_CLOSURE_CLASS_BEGIN$"
|
||||||
MacroBlockEnd: "^BSDF_CLOSURE_CLASS_END$"
|
MacroBlockEnd: "^BSDF_CLOSURE_CLASS_END$"
|
||||||
|
@@ -12,6 +12,8 @@ Checks: >
|
|||||||
-readability-avoid-const-params-in-decls,
|
-readability-avoid-const-params-in-decls,
|
||||||
-readability-simplify-boolean-expr,
|
-readability-simplify-boolean-expr,
|
||||||
-readability-make-member-function-const,
|
-readability-make-member-function-const,
|
||||||
|
-readability-suspicious-call-argument,
|
||||||
|
-readability-redundant-member-init,
|
||||||
|
|
||||||
-readability-misleading-indentation,
|
-readability-misleading-indentation,
|
||||||
|
|
||||||
@@ -25,6 +27,8 @@ Checks: >
|
|||||||
-bugprone-branch-clone,
|
-bugprone-branch-clone,
|
||||||
-bugprone-macro-parentheses,
|
-bugprone-macro-parentheses,
|
||||||
-bugprone-reserved-identifier,
|
-bugprone-reserved-identifier,
|
||||||
|
-bugprone-easily-swappable-parameters,
|
||||||
|
-bugprone-implicit-widening-of-multiplication-result,
|
||||||
|
|
||||||
-bugprone-sizeof-expression,
|
-bugprone-sizeof-expression,
|
||||||
-bugprone-integer-division,
|
-bugprone-integer-division,
|
||||||
@@ -40,7 +44,8 @@ Checks: >
|
|||||||
-modernize-pass-by-value,
|
-modernize-pass-by-value,
|
||||||
# Cannot be enabled yet, because using raw string literals in tests breaks
|
# Cannot be enabled yet, because using raw string literals in tests breaks
|
||||||
# the windows compiler currently.
|
# the windows compiler currently.
|
||||||
-modernize-raw-string-literal
|
-modernize-raw-string-literal,
|
||||||
|
-modernize-return-braced-init-list
|
||||||
|
|
||||||
CheckOptions:
|
CheckOptions:
|
||||||
- key: modernize-use-default-member-init.UseAssignment
|
- key: modernize-use-default-member-init.UseAssignment
|
||||||
|
@@ -411,6 +411,7 @@ option(WITH_CYCLES "Enable Cycles Render Engine" ON)
|
|||||||
option(WITH_CYCLES_OSL "Build Cycles with OpenShadingLanguage support" ON)
|
option(WITH_CYCLES_OSL "Build Cycles with OpenShadingLanguage support" ON)
|
||||||
option(WITH_CYCLES_EMBREE "Build Cycles with Embree support" ON)
|
option(WITH_CYCLES_EMBREE "Build Cycles with Embree support" ON)
|
||||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||||
|
option(WITH_CYCLES_DEBUG "Build Cycles with options useful for debugging (e.g., MIS)" OFF)
|
||||||
|
|
||||||
option(WITH_CYCLES_STANDALONE "Build Cycles standalone application" OFF)
|
option(WITH_CYCLES_STANDALONE "Build Cycles standalone application" OFF)
|
||||||
option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF)
|
option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF)
|
||||||
@@ -440,7 +441,11 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
|||||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||||
|
|
||||||
# AMD HIP
|
# AMD HIP
|
||||||
|
if(WIN32)
|
||||||
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||||
|
else()
|
||||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||||
|
endif()
|
||||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||||
@@ -1065,7 +1070,7 @@ if(MSVC)
|
|||||||
add_definitions(-D__LITTLE_ENDIAN__)
|
add_definitions(-D__LITTLE_ENDIAN__)
|
||||||
|
|
||||||
# OSX-Note: as we do cross-compiling with specific set architecture,
|
# OSX-Note: as we do cross-compiling with specific set architecture,
|
||||||
# endianess-detection and auto-setting is counterproductive
|
# endianness-detection and auto-setting is counterproductive
|
||||||
# so we just set endianness according CMAKE_OSX_ARCHITECTURES
|
# so we just set endianness according CMAKE_OSX_ARCHITECTURES
|
||||||
|
|
||||||
elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64 OR CMAKE_OSX_ARCHITECTURES MATCHES arm64)
|
elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64 OR CMAKE_OSX_ARCHITECTURES MATCHES arm64)
|
||||||
@@ -1755,7 +1760,7 @@ endif()
|
|||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
# If C++17 is not available, downgrading to an earlier standard is NOT OK.
|
# If C++17 is not available, downgrading to an earlier standard is NOT OK.
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
# Do not enable compiler specific language extentions.
|
# Do not enable compiler specific language extensions.
|
||||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
|
|
||||||
# Make MSVC properly report the value of the __cplusplus preprocessor macro
|
# Make MSVC properly report the value of the __cplusplus preprocessor macro
|
||||||
|
@@ -51,7 +51,7 @@ Other Convenience Targets
|
|||||||
* config: Run cmake configuration tool to set build options.
|
* config: Run cmake configuration tool to set build options.
|
||||||
* deps: Build library dependencies (intended only for platform maintainers).
|
* deps: Build library dependencies (intended only for platform maintainers).
|
||||||
|
|
||||||
The existance of locally build dependancies overrides the pre-built dependencies from subversion.
|
The existance of locally build dependencies overrides the pre-built dependencies from subversion.
|
||||||
These must be manually removed from '../lib/' to go back to using the pre-compiled libraries.
|
These must be manually removed from '../lib/' to go back to using the pre-compiled libraries.
|
||||||
|
|
||||||
Project Files
|
Project Files
|
||||||
|
@@ -17,7 +17,7 @@
|
|||||||
# ***** END GPL LICENSE BLOCK *****
|
# ***** END GPL LICENSE BLOCK *****
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# Copy all generated files to the proper strucure as blender prefers
|
# Copy all generated files to the proper structure as blender prefers
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
if(NOT DEFINED HARVEST_TARGET)
|
if(NOT DEFINED HARVEST_TARGET)
|
||||||
|
@@ -42,6 +42,7 @@ ExternalProject_Add(nanovdb
|
|||||||
URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
|
URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
|
||||||
PREFIX ${BUILD_DIR}/nanovdb
|
PREFIX ${BUILD_DIR}/nanovdb
|
||||||
SOURCE_SUBDIR nanovdb
|
SOURCE_SUBDIR nanovdb
|
||||||
|
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/nanovdb/src/nanovdb < ${PATCH_DIR}/nanovdb.diff
|
||||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
|
||||||
INSTALL_DIR ${LIBDIR}/nanovdb
|
INSTALL_DIR ${LIBDIR}/nanovdb
|
||||||
)
|
)
|
||||||
|
@@ -39,7 +39,7 @@ endif()
|
|||||||
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
|
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
|
||||||
# This path must be hard-coded like this, so that the GNUmakefile knows where it is and can pass it to make_source_archive.py:
|
# This path must be hard-coded like this, so that the GNUmakefile knows where it is and can pass it to make_source_archive.py:
|
||||||
set(PACKAGE_DIR "${CMAKE_CURRENT_BINARY_DIR}/packages")
|
set(PACKAGE_DIR "${CMAKE_CURRENT_BINARY_DIR}/packages")
|
||||||
option(PACKAGE_USE_UPSTREAM_SOURCES "Use soures upstream to download the package sources, when OFF the blender mirror will be used" ON)
|
option(PACKAGE_USE_UPSTREAM_SOURCES "Use sources upstream to download the package sources, when OFF the blender mirror will be used" ON)
|
||||||
|
|
||||||
file(TO_CMAKE_PATH ${DOWNLOAD_DIR} DOWNLOAD_DIR)
|
file(TO_CMAKE_PATH ${DOWNLOAD_DIR} DOWNLOAD_DIR)
|
||||||
file(TO_CMAKE_PATH ${PACKAGE_DIR} PACKAGE_DIR)
|
file(TO_CMAKE_PATH ${PACKAGE_DIR} PACKAGE_DIR)
|
||||||
|
@@ -24,7 +24,7 @@ if(MSVC)
|
|||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||||
COMMAND echo packaging python
|
COMMAND echo packaging python
|
||||||
COMMAND echo this should ouput at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
COMMAND echo this should output at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
|
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib
|
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python.exe ${PYTARGET}/bin/python.exe
|
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python.exe ${PYTARGET}/bin/python.exe
|
||||||
@@ -43,7 +43,7 @@ if(MSVC)
|
|||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||||
COMMAND echo packaging python
|
COMMAND echo packaging python
|
||||||
COMMAND echo this should ouput at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
COMMAND echo this should output at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
|
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib
|
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python${PYTHON_POSTFIX}.exe ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python${PYTHON_POSTFIX}.exe ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||||
|
@@ -1826,7 +1826,7 @@ compile_OCIO() {
|
|||||||
# Force linking against static libs
|
# Force linking against static libs
|
||||||
#rm -f $_inst/lib/*.so*
|
#rm -f $_inst/lib/*.so*
|
||||||
|
|
||||||
# Additional depencencies
|
# Additional dependencies
|
||||||
#cp ext/dist/lib/libtinyxml.a $_inst/lib
|
#cp ext/dist/lib/libtinyxml.a $_inst/lib
|
||||||
#cp ext/dist/lib/libyaml-cpp.a $_inst/lib
|
#cp ext/dist/lib/libyaml-cpp.a $_inst/lib
|
||||||
|
|
||||||
|
374
build_files/build_environment/patches/nanovdb.diff
Normal file
374
build_files/build_environment/patches/nanovdb.diff
Normal file
@@ -0,0 +1,374 @@
|
|||||||
|
Index: nanovdb/nanovdb/NanoVDB.h
|
||||||
|
===================================================================
|
||||||
|
--- a/nanovdb/nanovdb/NanoVDB.h (revision 62751)
|
||||||
|
+++ b/nanovdb/nanovdb/NanoVDB.h (working copy)
|
||||||
|
@@ -152,8 +152,8 @@
|
||||||
|
|
||||||
|
#endif // __CUDACC_RTC__
|
||||||
|
|
||||||
|
-#ifdef __CUDACC__
|
||||||
|
-// Only define __hostdev__ when using NVIDIA CUDA compiler
|
||||||
|
+#if defined(__CUDACC__) || defined(__HIP__)
|
||||||
|
+// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler
|
||||||
|
#define __hostdev__ __host__ __device__
|
||||||
|
#else
|
||||||
|
#define __hostdev__
|
||||||
|
@@ -461,7 +461,7 @@
|
||||||
|
/// Maximum floating-point values
|
||||||
|
template<typename T>
|
||||||
|
struct Maximum;
|
||||||
|
-#ifdef __CUDA_ARCH__
|
||||||
|
+#if defined(__CUDA_ARCH__) || defined(__HIP__)
|
||||||
|
template<>
|
||||||
|
struct Maximum<int>
|
||||||
|
{
|
||||||
|
@@ -1006,10 +1006,10 @@
|
||||||
|
using Vec3i = Vec3<int>;
|
||||||
|
|
||||||
|
/// @brief Return a single precision floating-point vector of this coordinate
|
||||||
|
-Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
|
||||||
|
+inline __hostdev__ Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
|
||||||
|
|
||||||
|
/// @brief Return a double precision floating-point vector of this coordinate
|
||||||
|
-Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
|
||||||
|
+inline __hostdev__ Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
|
||||||
|
|
||||||
|
// ----------------------------> Vec4 <--------------------------------------
|
||||||
|
|
||||||
|
@@ -1820,7 +1820,7 @@
|
||||||
|
}; // Map
|
||||||
|
|
||||||
|
template<typename Mat4T>
|
||||||
|
-void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
|
||||||
|
+__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
|
||||||
|
{
|
||||||
|
float * mf = mMatF, *vf = mVecF;
|
||||||
|
float* mif = mInvMatF;
|
||||||
|
@@ -2170,7 +2170,7 @@
|
||||||
|
}; // Class Grid
|
||||||
|
|
||||||
|
template<typename TreeT>
|
||||||
|
-int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
|
||||||
|
+__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
|
||||||
|
{
|
||||||
|
for (uint32_t i = 0, n = blindDataCount(); i < n; ++i)
|
||||||
|
if (blindMetaData(i).mSemantic == semantic)
|
||||||
|
@@ -2328,7 +2328,7 @@
|
||||||
|
}; // Tree class
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
-void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
|
||||||
|
+__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
|
||||||
|
{
|
||||||
|
min = this->root().valueMin();
|
||||||
|
max = this->root().valueMax();
|
||||||
|
@@ -2336,7 +2336,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-const NodeT* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
+__hostdev__ const NodeT* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
{
|
||||||
|
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: unvalid node type");
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
|
||||||
|
@@ -2345,7 +2345,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<int LEVEL>
|
||||||
|
-const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
+__hostdev__ const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
{
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
|
||||||
|
return reinterpret_cast<const TreeNodeT<LEVEL>*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
|
||||||
|
@@ -2353,7 +2353,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-NodeT* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
+__hostdev__ NodeT* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
{
|
||||||
|
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: invalid node type");
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
|
||||||
|
@@ -2362,7 +2362,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<int LEVEL>
|
||||||
|
-typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
+__hostdev__ typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
{
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
|
||||||
|
return reinterpret_cast<TreeNodeT<LEVEL>*>(reinterpret_cast<uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
|
||||||
|
@@ -2370,7 +2370,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
|
||||||
|
+__hostdev__ uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
|
||||||
|
{
|
||||||
|
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNodeID: invalid node type");
|
||||||
|
const NodeT* first = reinterpret_cast<const NodeT*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[NodeT::LEVEL]);
|
||||||
|
@@ -2380,7 +2380,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
|
||||||
|
+__hostdev__ uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
|
||||||
|
{
|
||||||
|
return this->getNodeID(node) + DataType::mPFSum[NodeT::LEVEL];
|
||||||
|
}
|
||||||
|
@@ -3366,7 +3366,7 @@
|
||||||
|
}; // LeafNode class
|
||||||
|
|
||||||
|
template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
|
||||||
|
-inline void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
|
||||||
|
+inline __hostdev__ void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
|
||||||
|
{
|
||||||
|
static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
|
||||||
|
if (!this->isActive()) return;
|
||||||
|
Index: nanovdb/nanovdb/util/SampleFromVoxels.h
|
||||||
|
===================================================================
|
||||||
|
--- a/nanovdb/nanovdb/util/SampleFromVoxels.h (revision 62751)
|
||||||
|
+++ b/nanovdb/nanovdb/util/SampleFromVoxels.h (working copy)
|
||||||
|
@@ -22,7 +22,7 @@
|
||||||
|
#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
|
||||||
|
|
||||||
|
// Only define __hostdev__ when compiling as NVIDIA CUDA
|
||||||
|
-#ifdef __CUDACC__
|
||||||
|
+#if defined(__CUDACC__) || defined(__HIP__)
|
||||||
|
#define __hostdev__ __host__ __device__
|
||||||
|
#else
|
||||||
|
#include <cmath> // for floor
|
||||||
|
@@ -136,7 +136,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
|
||||||
|
{
|
||||||
|
const CoordT ijk = Round<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
||||||
|
@@ -147,7 +147,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
|
||||||
|
{
|
||||||
|
if (ijk != mPos) {
|
||||||
|
mPos = ijk;
|
||||||
|
@@ -158,7 +158,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
|
||||||
|
{
|
||||||
|
return mAcc.getValue(Round<CoordT>(xyz));
|
||||||
|
}
|
||||||
|
@@ -195,7 +195,7 @@
|
||||||
|
}; // TrilinearSamplerBase
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
|
||||||
|
+__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
|
||||||
|
{
|
||||||
|
v[0][0][0] = mAcc.getValue(ijk); // i, j, k
|
||||||
|
|
||||||
|
@@ -224,7 +224,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
|
||||||
|
@@ -239,7 +239,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
{
|
||||||
|
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
|
||||||
|
#if 0
|
||||||
|
@@ -270,7 +270,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
|
||||||
|
+__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
|
||||||
|
{
|
||||||
|
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
|
||||||
|
const bool less = v[0][0][0] < ValueT(0);
|
||||||
|
@@ -363,7 +363,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::sample(xyz, mVal);
|
||||||
|
@@ -370,7 +370,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
|
||||||
|
{
|
||||||
|
return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
|
||||||
|
}
|
||||||
|
@@ -377,7 +377,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::gradient(xyz, mVal);
|
||||||
|
@@ -393,7 +393,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
{
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
||||||
|
@@ -406,7 +406,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[2][2][2];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -418,7 +418,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
|
||||||
|
|
||||||
|
@@ -463,7 +463,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
+inline __hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[2][2][2];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -473,7 +473,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[2][2][2];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -510,7 +510,7 @@
|
||||||
|
}; // TriquadraticSamplerBase
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
|
||||||
|
+__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
|
||||||
|
{
|
||||||
|
CoordT p(ijk[0] - 1, 0, 0);
|
||||||
|
for (int dx = 0; dx < 3; ++dx, ++p[0]) {
|
||||||
|
@@ -526,7 +526,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
|
||||||
|
{
|
||||||
|
auto kernel = [](const ValueT* value, double weight)->ValueT {
|
||||||
|
return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) +
|
||||||
|
@@ -545,7 +545,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
|
||||||
|
+__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
|
||||||
|
{
|
||||||
|
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
|
||||||
|
const bool less = v[0][0][0] < ValueT(0);
|
||||||
|
@@ -624,7 +624,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::sample(xyz, mVal);
|
||||||
|
@@ -631,7 +631,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
|
||||||
|
{
|
||||||
|
return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
|
||||||
|
}
|
||||||
|
@@ -646,7 +646,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
{
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
||||||
|
@@ -657,7 +657,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[3][3][3];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -667,7 +667,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[3][3][3];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -710,7 +710,7 @@
|
||||||
|
}; // TricubicSampler
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
|
||||||
|
+__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
|
||||||
|
{
|
||||||
|
auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
|
||||||
|
|
||||||
|
@@ -929,7 +929,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::sample(xyz, mC);
|
||||||
|
@@ -937,7 +937,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
{
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
@@ -81,4 +81,5 @@ if(NOT APPLE)
|
|||||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||||
|
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -529,7 +529,7 @@ function(SETUP_LIBDIRS)
|
|||||||
|
|
||||||
# NOTE: For all new libraries, use absolute library paths.
|
# NOTE: For all new libraries, use absolute library paths.
|
||||||
# This should eventually be phased out.
|
# This should eventually be phased out.
|
||||||
# APPLE plaform uses full paths for linking libraries, and avoids link_directories.
|
# APPLE platform uses full paths for linking libraries, and avoids link_directories.
|
||||||
if(NOT MSVC AND NOT APPLE)
|
if(NOT MSVC AND NOT APPLE)
|
||||||
link_directories(${JPEG_LIBPATH} ${PNG_LIBPATH} ${ZLIB_LIBPATH} ${FREETYPE_LIBPATH})
|
link_directories(${JPEG_LIBPATH} ${PNG_LIBPATH} ${ZLIB_LIBPATH} ${FREETYPE_LIBPATH})
|
||||||
|
|
||||||
|
@@ -27,7 +27,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
|
|||||||
# Install the CRT to the blender.crt Sub folder.
|
# Install the CRT to the blender.crt Sub folder.
|
||||||
install(FILES ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION ./blender.crt COMPONENT Libraries)
|
install(FILES ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION ./blender.crt COMPONENT Libraries)
|
||||||
|
|
||||||
# Generating the manifest is a relativly expensive operation since
|
# Generating the manifest is a relatively expensive operation since
|
||||||
# it is collecting an sha1 hash for every file required. so only do
|
# it is collecting an sha1 hash for every file required. so only do
|
||||||
# this work when the libs have either changed or the manifest does
|
# this work when the libs have either changed or the manifest does
|
||||||
# not exist yet.
|
# not exist yet.
|
||||||
|
@@ -11,7 +11,7 @@ import queue
|
|||||||
|
|
||||||
execution_queue = queue.Queue()
|
execution_queue = queue.Queue()
|
||||||
|
|
||||||
# This function can savely be called in another thread.
|
# This function can safely be called in another thread.
|
||||||
# The function will be executed when the timer runs the next time.
|
# The function will be executed when the timer runs the next time.
|
||||||
def run_in_main_thread(function):
|
def run_in_main_thread(function):
|
||||||
execution_queue.put(function)
|
execution_queue.put(function)
|
||||||
|
@@ -42,8 +42,13 @@ class SimpleMouseOperator(bpy.types.Operator):
|
|||||||
self.y = event.mouse_y
|
self.y = event.mouse_y
|
||||||
return self.execute(context)
|
return self.execute(context)
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(SimpleMouseOperator.bl_idname, text="Simple Mouse Operator")
|
||||||
|
|
||||||
|
# Register and add to the view menu (required to also use F3 search "Simple Mouse Operator" for quick access)
|
||||||
bpy.utils.register_class(SimpleMouseOperator)
|
bpy.utils.register_class(SimpleMouseOperator)
|
||||||
|
bpy.types.VIEW3D_MT_view.append(menu_func)
|
||||||
|
|
||||||
# Test call to the newly defined operator.
|
# Test call to the newly defined operator.
|
||||||
# Here we call the operator and invoke it, meaning that the settings are taken
|
# Here we call the operator and invoke it, meaning that the settings are taken
|
||||||
|
@@ -43,7 +43,7 @@ def menu_func(self, context):
|
|||||||
self.layout.operator(ExportSomeData.bl_idname, text="Text Export Operator")
|
self.layout.operator(ExportSomeData.bl_idname, text="Text Export Operator")
|
||||||
|
|
||||||
|
|
||||||
# Register and add to the file selector
|
# Register and add to the file selector (required to also use F3 search "Text Export Operator" for quick access)
|
||||||
bpy.utils.register_class(ExportSomeData)
|
bpy.utils.register_class(ExportSomeData)
|
||||||
bpy.types.TOPBAR_MT_file_export.append(menu_func)
|
bpy.types.TOPBAR_MT_file_export.append(menu_func)
|
||||||
|
|
||||||
|
@@ -27,8 +27,14 @@ class DialogOperator(bpy.types.Operator):
|
|||||||
wm = context.window_manager
|
wm = context.window_manager
|
||||||
return wm.invoke_props_dialog(self)
|
return wm.invoke_props_dialog(self)
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(DialogOperator.bl_idname, text="Dialog Operator")
|
||||||
|
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Dialog Operator" for quick access)
|
||||||
bpy.utils.register_class(DialogOperator)
|
bpy.utils.register_class(DialogOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# Test call.
|
# Test call.
|
||||||
bpy.ops.object.dialog_operator('INVOKE_DEFAULT')
|
bpy.ops.object.dialog_operator('INVOKE_DEFAULT')
|
||||||
|
@@ -41,8 +41,13 @@ class CustomDrawOperator(bpy.types.Operator):
|
|||||||
|
|
||||||
col.prop(self, "my_string")
|
col.prop(self, "my_string")
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(CustomDrawOperator.bl_idname, text="Custom Draw Operator")
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Custom Draw Operator" for quick access)
|
||||||
bpy.utils.register_class(CustomDrawOperator)
|
bpy.utils.register_class(CustomDrawOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# test call
|
# test call
|
||||||
bpy.ops.object.custom_draw('INVOKE_DEFAULT')
|
bpy.ops.object.custom_draw('INVOKE_DEFAULT')
|
||||||
|
@@ -55,8 +55,13 @@ class ModalOperator(bpy.types.Operator):
|
|||||||
context.window_manager.modal_handler_add(self)
|
context.window_manager.modal_handler_add(self)
|
||||||
return {'RUNNING_MODAL'}
|
return {'RUNNING_MODAL'}
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(ModalOperator.bl_idname, text="Modal Operator")
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Modal Operator" for quick access)
|
||||||
bpy.utils.register_class(ModalOperator)
|
bpy.utils.register_class(ModalOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# test call
|
# test call
|
||||||
bpy.ops.object.modal_operator('INVOKE_DEFAULT')
|
bpy.ops.object.modal_operator('INVOKE_DEFAULT')
|
||||||
|
@@ -31,8 +31,13 @@ class SearchEnumOperator(bpy.types.Operator):
|
|||||||
context.window_manager.invoke_search_popup(self)
|
context.window_manager.invoke_search_popup(self)
|
||||||
return {'RUNNING_MODAL'}
|
return {'RUNNING_MODAL'}
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(SearchEnumOperator.bl_idname, text="Search Enum Operator")
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Search Enum Operator" for quick access)
|
||||||
bpy.utils.register_class(SearchEnumOperator)
|
bpy.utils.register_class(SearchEnumOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# test call
|
# test call
|
||||||
bpy.ops.object.search_enum_operator('INVOKE_DEFAULT')
|
bpy.ops.object.search_enum_operator('INVOKE_DEFAULT')
|
||||||
|
@@ -22,8 +22,13 @@ class HelloWorldOperator(bpy.types.Operator):
|
|||||||
print("Hello World")
|
print("Hello World")
|
||||||
return {'FINISHED'}
|
return {'FINISHED'}
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(HelloWorldOperator.bl_idname, text="Hello World Operator")
|
||||||
|
|
||||||
|
# Register and add to the view menu (required to also use F3 search "Hello World Operator" for quick access)
|
||||||
bpy.utils.register_class(HelloWorldOperator)
|
bpy.utils.register_class(HelloWorldOperator)
|
||||||
|
bpy.types.VIEW3D_MT_view.append(menu_func)
|
||||||
|
|
||||||
# test call to the newly defined operator
|
# test call to the newly defined operator
|
||||||
bpy.ops.wm.hello_world()
|
bpy.ops.wm.hello_world()
|
||||||
|
@@ -106,24 +106,6 @@ including advanced features.
|
|||||||
floating-point values. These values are interpreted as a plane equation.
|
floating-point values. These values are interpreted as a plane equation.
|
||||||
|
|
||||||
|
|
||||||
.. function:: glColor (red, green, blue, alpha):
|
|
||||||
|
|
||||||
B{glColor3b, glColor3d, glColor3f, glColor3i, glColor3s, glColor3ub, glColor3ui, glColor3us,
|
|
||||||
glColor4b, glColor4d, glColor4f, glColor4i, glColor4s, glColor4ub, glColor4ui, glColor4us,
|
|
||||||
glColor3bv, glColor3dv, glColor3fv, glColor3iv, glColor3sv, glColor3ubv, glColor3uiv,
|
|
||||||
glColor3usv, glColor4bv, glColor4dv, glColor4fv, glColor4iv, glColor4sv, glColor4ubv,
|
|
||||||
glColor4uiv, glColor4usv}
|
|
||||||
|
|
||||||
Set a new color.
|
|
||||||
|
|
||||||
.. seealso:: `OpenGL Docs <https://khronos.org/registry/OpenGL-Refpages/gl4/html/glColor.xhtml>`__
|
|
||||||
|
|
||||||
:type red, green, blue, alpha: Depends on function prototype.
|
|
||||||
:arg red, green, blue: Specify new red, green, and blue values for the current color.
|
|
||||||
:arg alpha: Specifies a new alpha value for the current color. Included only in the
|
|
||||||
four-argument glColor4 commands. (With '4' colors only)
|
|
||||||
|
|
||||||
|
|
||||||
.. function:: glColorMask(red, green, blue, alpha):
|
.. function:: glColorMask(red, green, blue, alpha):
|
||||||
|
|
||||||
Enable and disable writing of frame buffer color components
|
Enable and disable writing of frame buffer color components
|
||||||
|
@@ -728,7 +728,7 @@ Abusing RNA property callbacks
|
|||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
Python-defined RNA properties can have custom callbacks. Trying to perform complex operations
|
Python-defined RNA properties can have custom callbacks. Trying to perform complex operations
|
||||||
from there, like calling an operator, may work, but is not officialy recommended nor supported.
|
from there, like calling an operator, may work, but is not officially recommended nor supported.
|
||||||
|
|
||||||
Main reason is that those callback should be very fast, but additionally, it may for example
|
Main reason is that those callback should be very fast, but additionally, it may for example
|
||||||
create issues with undo/redo system (most operators store an history step, and editing an RNA
|
create issues with undo/redo system (most operators store an history step, and editing an RNA
|
||||||
|
43
extern/hipew/include/hipew.h
vendored
43
extern/hipew/include/hipew.h
vendored
@@ -804,31 +804,29 @@ typedef enum hipDeviceP2PAttr {
|
|||||||
} hipDeviceP2PAttr;
|
} hipDeviceP2PAttr;
|
||||||
|
|
||||||
typedef struct HIP_MEMCPY3D {
|
typedef struct HIP_MEMCPY3D {
|
||||||
size_t srcXInBytes;
|
unsigned int srcXInBytes;
|
||||||
size_t srcY;
|
unsigned int srcY;
|
||||||
size_t srcZ;
|
unsigned int srcZ;
|
||||||
size_t srcLOD;
|
unsigned int srcLOD;
|
||||||
hipMemoryType srcMemoryType;
|
hipMemoryType srcMemoryType;
|
||||||
const void* srcHost;
|
const void* srcHost;
|
||||||
hipDeviceptr_t srcDevice;
|
hipDeviceptr_t srcDevice;
|
||||||
hArray * srcArray;
|
hArray srcArray;
|
||||||
void* reserved0;
|
unsigned int srcPitch;
|
||||||
size_t srcPitch;
|
unsigned int srcHeight;
|
||||||
size_t srcHeight;
|
unsigned int dstXInBytes;
|
||||||
size_t dstXInBytes;
|
unsigned int dstY;
|
||||||
size_t dstY;
|
unsigned int dstZ;
|
||||||
size_t dstZ;
|
unsigned int dstLOD;
|
||||||
size_t dstLOD;
|
|
||||||
hipMemoryType dstMemoryType;
|
hipMemoryType dstMemoryType;
|
||||||
void* dstHost;
|
void* dstHost;
|
||||||
hipDeviceptr_t dstDevice;
|
hipDeviceptr_t dstDevice;
|
||||||
hArray * dstArray;
|
hArray dstArray;
|
||||||
void* reserved1;
|
unsigned int dstPitch;
|
||||||
size_t dstPitch;
|
unsigned int dstHeight;
|
||||||
size_t dstHeight;
|
unsigned int WidthInBytes;
|
||||||
size_t WidthInBytes;
|
unsigned int Height;
|
||||||
size_t Height;
|
unsigned int Depth;
|
||||||
size_t Depth;
|
|
||||||
} HIP_MEMCPY3D;
|
} HIP_MEMCPY3D;
|
||||||
|
|
||||||
typedef struct HIP_MEMCPY3D_PEER_st {
|
typedef struct HIP_MEMCPY3D_PEER_st {
|
||||||
@@ -879,7 +877,7 @@ typedef struct HIP_RESOURCE_DESC_st {
|
|||||||
hipResourceType resType;
|
hipResourceType resType;
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
hArray * h_Array;
|
hArray h_Array;
|
||||||
} array;
|
} array;
|
||||||
struct {
|
struct {
|
||||||
hipMipmappedArray_t hMipmappedArray;
|
hipMipmappedArray_t hMipmappedArray;
|
||||||
@@ -1074,9 +1072,10 @@ typedef enum hiprtcResult {
|
|||||||
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
||||||
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
|
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
|
||||||
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
||||||
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
|
typedef hipError_t HIPAPI thipGetDevice(int* device);
|
||||||
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
||||||
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
|
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
|
||||||
|
typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
|
||||||
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
||||||
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
||||||
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
||||||
@@ -1209,6 +1208,7 @@ extern thipDriverGetVersion *hipDriverGetVersion;
|
|||||||
extern thipGetDevice *hipGetDevice;
|
extern thipGetDevice *hipGetDevice;
|
||||||
extern thipGetDeviceCount *hipGetDeviceCount;
|
extern thipGetDeviceCount *hipGetDeviceCount;
|
||||||
extern thipGetDeviceProperties *hipGetDeviceProperties;
|
extern thipGetDeviceProperties *hipGetDeviceProperties;
|
||||||
|
extern thipDeviceGet* hipDeviceGet;
|
||||||
extern thipDeviceGetName *hipDeviceGetName;
|
extern thipDeviceGetName *hipDeviceGetName;
|
||||||
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||||
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||||
@@ -1333,6 +1333,7 @@ enum {
|
|||||||
HIPEW_SUCCESS = 0,
|
HIPEW_SUCCESS = 0,
|
||||||
HIPEW_ERROR_OPEN_FAILED = -1,
|
HIPEW_ERROR_OPEN_FAILED = -1,
|
||||||
HIPEW_ERROR_ATEXIT_FAILED = -2,
|
HIPEW_ERROR_ATEXIT_FAILED = -2,
|
||||||
|
HIPEW_ERROR_OLD_DRIVER = -3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
40
extern/hipew/src/hipew.c
vendored
40
extern/hipew/src/hipew.c
vendored
@@ -71,6 +71,7 @@ thipDriverGetVersion *hipDriverGetVersion;
|
|||||||
thipGetDevice *hipGetDevice;
|
thipGetDevice *hipGetDevice;
|
||||||
thipGetDeviceCount *hipGetDeviceCount;
|
thipGetDeviceCount *hipGetDeviceCount;
|
||||||
thipGetDeviceProperties *hipGetDeviceProperties;
|
thipGetDeviceProperties *hipGetDeviceProperties;
|
||||||
|
thipDeviceGet* hipDeviceGet;
|
||||||
thipDeviceGetName *hipDeviceGetName;
|
thipDeviceGetName *hipDeviceGetName;
|
||||||
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||||
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||||
@@ -213,6 +214,36 @@ static void hipewHipExit(void) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static int hipewHasOldDriver(const char *hip_path) {
|
||||||
|
DWORD verHandle = 0;
|
||||||
|
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
|
||||||
|
int old_driver = 0;
|
||||||
|
if(verSize != 0) {
|
||||||
|
LPSTR verData = (LPSTR)malloc(verSize);
|
||||||
|
if(GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
|
||||||
|
LPBYTE lpBuffer = NULL;
|
||||||
|
UINT size = 0;
|
||||||
|
if(VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
|
||||||
|
if(size) {
|
||||||
|
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
|
||||||
|
/* Magic value from
|
||||||
|
* https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
|
||||||
|
if(verInfo->dwSignature == 0xfeef04bd) {
|
||||||
|
unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
|
||||||
|
unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
|
||||||
|
/* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
|
||||||
|
old_driver = ((fileVersionLS0 < 3354) || (fileVersionLS0 == 3354 && fileversionLS1 < 13));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(verData);
|
||||||
|
}
|
||||||
|
return old_driver;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int hipewHipInit(void) {
|
static int hipewHipInit(void) {
|
||||||
/* Library paths. */
|
/* Library paths. */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@@ -240,6 +271,14 @@ static int hipewHipInit(void) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
/* Test for driver version. */
|
||||||
|
if(hipewHasOldDriver(hip_paths[0])) {
|
||||||
|
result = HIPEW_ERROR_OLD_DRIVER;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Load library. */
|
/* Load library. */
|
||||||
hip_lib = dynamic_library_open_find(hip_paths);
|
hip_lib = dynamic_library_open_find(hip_paths);
|
||||||
|
|
||||||
@@ -255,6 +294,7 @@ static int hipewHipInit(void) {
|
|||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
||||||
|
@@ -226,6 +226,9 @@ add_definitions(
|
|||||||
-DCCL_NAMESPACE_END=}
|
-DCCL_NAMESPACE_END=}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEBUG)
|
||||||
|
add_definitions(-DWITH_CYCLES_DEBUG)
|
||||||
|
endif()
|
||||||
if(WITH_CYCLES_STANDALONE_GUI)
|
if(WITH_CYCLES_STANDALONE_GUI)
|
||||||
add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
|
add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
|
||||||
endif()
|
endif()
|
||||||
@@ -334,7 +337,7 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Warnings
|
# Warnings
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
|
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
|
||||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
|
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
|
||||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")
|
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")
|
||||||
|
@@ -218,6 +218,12 @@ enum_denoising_prefilter = (
|
|||||||
('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
|
('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
enum_direct_light_sampling_type = (
|
||||||
|
('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling", "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0),
|
||||||
|
('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1),
|
||||||
|
('NEXT_EVENT_ESTIMATION', "Next-Event Estimation", "Direct light contributions are only sampled using next-event estimation", 2),
|
||||||
|
)
|
||||||
|
|
||||||
def update_render_passes(self, context):
|
def update_render_passes(self, context):
|
||||||
scene = context.scene
|
scene = context.scene
|
||||||
view_layer = context.view_layer
|
view_layer = context.view_layer
|
||||||
@@ -325,6 +331,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
default=1024,
|
default=1024,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sample_offset: IntProperty(
|
||||||
|
name="Sample Offset",
|
||||||
|
description="Number of samples to skip when starting render",
|
||||||
|
min=0, max=(1 << 24),
|
||||||
|
default=0,
|
||||||
|
)
|
||||||
|
|
||||||
time_limit: FloatProperty(
|
time_limit: FloatProperty(
|
||||||
name="Time Limit",
|
name="Time Limit",
|
||||||
description="Limit the render time (excluding synchronization time)."
|
description="Limit the render time (excluding synchronization time)."
|
||||||
@@ -346,7 +359,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
name="Scrambling Distance",
|
name="Scrambling Distance",
|
||||||
default=1.0,
|
default=1.0,
|
||||||
min=0.0, max=1.0,
|
min=0.0, max=1.0,
|
||||||
description="Lower values give faster rendering with GPU rendering and less noise with all devices at the cost of possible artifacts if set too low. Only works when not using adaptive sampling",
|
description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low. Only works when not using adaptive sampling",
|
||||||
)
|
)
|
||||||
preview_scrambling_distance: BoolProperty(
|
preview_scrambling_distance: BoolProperty(
|
||||||
name="Scrambling Distance viewport",
|
name="Scrambling Distance viewport",
|
||||||
@@ -354,10 +367,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
|
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
|
||||||
)
|
)
|
||||||
|
|
||||||
adaptive_scrambling_distance: BoolProperty(
|
auto_scrambling_distance: BoolProperty(
|
||||||
name="Adaptive Scrambling Distance",
|
name="Automatic Scrambling Distance",
|
||||||
default=False,
|
default=False,
|
||||||
description="Uses a formula to adapt the scrambling distance strength based on the sample count",
|
description="Automatically reduce the randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts. Only works when not using adaptive sampling",
|
||||||
)
|
)
|
||||||
|
|
||||||
use_layer_samples: EnumProperty(
|
use_layer_samples: EnumProperty(
|
||||||
@@ -415,6 +428,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
default=0,
|
default=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
direct_light_sampling_type: EnumProperty(
|
||||||
|
name="Direct Light Sampling Type",
|
||||||
|
description="The type of strategy used for sampling direct light contributions",
|
||||||
|
items=enum_direct_light_sampling_type,
|
||||||
|
default='MULTIPLE_IMPORTANCE_SAMPLING',
|
||||||
|
)
|
||||||
|
|
||||||
min_light_bounces: IntProperty(
|
min_light_bounces: IntProperty(
|
||||||
name="Min Light Bounces",
|
name="Min Light Bounces",
|
||||||
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
|
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
|
||||||
@@ -770,8 +790,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
)
|
)
|
||||||
|
|
||||||
use_auto_tile: BoolProperty(
|
use_auto_tile: BoolProperty(
|
||||||
name="Auto Tiles",
|
name="Using Tiling",
|
||||||
description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||||
default=True,
|
default=True,
|
||||||
)
|
)
|
||||||
tile_size: IntProperty(
|
tile_size: IntProperty(
|
||||||
@@ -1419,10 +1439,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
||||||
elif device_type == 'HIP':
|
elif device_type == 'HIP':
|
||||||
import sys
|
import sys
|
||||||
col.label(text="Requires discrete AMD GPU with RDNA2 architecture", icon='BLANK1')
|
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||||
# TODO: provide driver version info.
|
if sys.platform[:3] == "win":
|
||||||
#if sys.platform[:3] == "win":
|
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||||
# col.label(text="and AMD driver version ??? or newer", icon='BLANK1')
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for device in devices:
|
for device in devices:
|
||||||
|
@@ -290,15 +290,18 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
|
|||||||
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||||
col.prop(cscene, "sampling_pattern", text="Pattern")
|
col.prop(cscene, "sampling_pattern", text="Pattern")
|
||||||
|
|
||||||
|
col = layout.column(align=True)
|
||||||
|
col.prop(cscene, "sample_offset")
|
||||||
|
|
||||||
layout.separator()
|
layout.separator()
|
||||||
|
|
||||||
col = layout.column(align=True)
|
heading = layout.column(align=True, heading="Scrambling Distance")
|
||||||
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
heading.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||||
col.prop(cscene, "scrambling_distance", text="Scrambling Distance")
|
heading.prop(cscene, "auto_scrambling_distance", text="Automatic")
|
||||||
col.prop(cscene, "adaptive_scrambling_distance", text="Adaptive")
|
sub = heading.row()
|
||||||
sub = col.row(align=True)
|
|
||||||
sub.active = not cscene.use_preview_adaptive_sampling
|
sub.active = not cscene.use_preview_adaptive_sampling
|
||||||
sub.prop(cscene, "preview_scrambling_distance", text="Viewport")
|
sub.prop(cscene, "preview_scrambling_distance", text="Viewport")
|
||||||
|
heading.prop(cscene, "scrambling_distance", text="Multiplier")
|
||||||
|
|
||||||
layout.separator()
|
layout.separator()
|
||||||
|
|
||||||
@@ -1051,7 +1054,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
|
|||||||
|
|
||||||
|
|
||||||
def has_geometry_visibility(ob):
|
def has_geometry_visibility(ob):
|
||||||
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
|
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'HAIR'}) or
|
||||||
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
|
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
|
||||||
|
|
||||||
|
|
||||||
|
@@ -199,7 +199,7 @@ static bool ObtainCacheParticleUV(Hair *hair,
|
|||||||
b_mesh->uv_layers.begin(l);
|
b_mesh->uv_layers.begin(l);
|
||||||
|
|
||||||
float2 uv = zero_float2();
|
float2 uv = zero_float2();
|
||||||
if (b_mesh->uv_layers.length())
|
if (!b_mesh->uv_layers.empty())
|
||||||
b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
|
b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
|
||||||
CData->curve_uv.push_back_slow(uv);
|
CData->curve_uv.push_back_slow(uv);
|
||||||
|
|
||||||
@@ -261,7 +261,7 @@ static bool ObtainCacheParticleVcol(Hair *hair,
|
|||||||
b_mesh->vertex_colors.begin(l);
|
b_mesh->vertex_colors.begin(l);
|
||||||
|
|
||||||
float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
|
float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||||
if (b_mesh->vertex_colors.length())
|
if (!b_mesh->vertex_colors.empty())
|
||||||
b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
|
b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
|
||||||
CData->curve_vcol.push_back_slow(vcol);
|
CData->curve_vcol.push_back_slow(vcol);
|
||||||
|
|
||||||
|
@@ -334,7 +334,7 @@ bool BlenderDisplayDriver::update_begin(const Params ¶ms,
|
|||||||
|
|
||||||
/* Update PBO dimensions if needed.
|
/* Update PBO dimensions if needed.
|
||||||
*
|
*
|
||||||
* NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
|
* NOTE: Allocate the PBO for the size which will fit the final render resolution (as in,
|
||||||
* at a resolution divider 1. This was we don't need to recreate graphics interoperability
|
* at a resolution divider 1. This was we don't need to recreate graphics interoperability
|
||||||
* objects which are costly and which are tied to the specific underlying buffer size.
|
* objects which are costly and which are tied to the specific underlying buffer size.
|
||||||
* The downside of this approach is that when graphics interoperability is not used we are
|
* The downside of this approach is that when graphics interoperability is not used we are
|
||||||
|
@@ -555,7 +555,7 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh,
|
|||||||
/* Create uv map attributes. */
|
/* Create uv map attributes. */
|
||||||
static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
|
static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
|
||||||
{
|
{
|
||||||
if (b_mesh.uv_layers.length() != 0) {
|
if (!b_mesh.uv_layers.empty()) {
|
||||||
for (BL::MeshUVLoopLayer &l : b_mesh.uv_layers) {
|
for (BL::MeshUVLoopLayer &l : b_mesh.uv_layers) {
|
||||||
const bool active_render = l.active_render();
|
const bool active_render = l.active_render();
|
||||||
AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
|
AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
|
||||||
@@ -619,7 +619,7 @@ static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
|
|||||||
|
|
||||||
static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
|
static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
|
||||||
{
|
{
|
||||||
if (b_mesh.uv_layers.length() != 0) {
|
if (!b_mesh.uv_layers.empty()) {
|
||||||
BL::Mesh::uv_layers_iterator l;
|
BL::Mesh::uv_layers_iterator l;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
@@ -951,7 +951,7 @@ static void create_mesh(Scene *scene,
|
|||||||
N = attr_N->data_float3();
|
N = attr_N->data_float3();
|
||||||
|
|
||||||
/* create generated coordinates from undeformed coordinates */
|
/* create generated coordinates from undeformed coordinates */
|
||||||
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.length() == 0) &&
|
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.empty()) &&
|
||||||
(mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
|
(mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
|
||||||
if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
|
if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
|
||||||
Attribute *attr = attributes.add(ATTR_STD_GENERATED);
|
Attribute *attr = attributes.add(ATTR_STD_GENERATED);
|
||||||
|
@@ -62,15 +62,15 @@ bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BlenderSync::object_is_geometry(BL::Object &b_ob)
|
bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)
|
||||||
{
|
{
|
||||||
BL::ID b_ob_data = b_ob.data();
|
BL::ID b_ob_data = b_ob_info.object_data;
|
||||||
|
|
||||||
if (!b_ob_data) {
|
if (!b_ob_data) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
BL::Object::type_enum type = b_ob.type();
|
BL::Object::type_enum type = b_ob_info.iter_object.type();
|
||||||
|
|
||||||
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
|
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
|
||||||
/* Will be exported attached to mesh. */
|
/* Will be exported attached to mesh. */
|
||||||
@@ -87,6 +87,24 @@ bool BlenderSync::object_is_geometry(BL::Object &b_ob)
|
|||||||
return b_ob_data.is_a(&RNA_Mesh);
|
return b_ob_data.is_a(&RNA_Mesh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BlenderSync::object_can_have_geometry(BL::Object &b_ob)
|
||||||
|
{
|
||||||
|
BL::Object::type_enum type = b_ob.type();
|
||||||
|
switch (type) {
|
||||||
|
case BL::Object::type_MESH:
|
||||||
|
case BL::Object::type_CURVE:
|
||||||
|
case BL::Object::type_SURFACE:
|
||||||
|
case BL::Object::type_META:
|
||||||
|
case BL::Object::type_FONT:
|
||||||
|
case BL::Object::type_HAIR:
|
||||||
|
case BL::Object::type_POINTCLOUD:
|
||||||
|
case BL::Object::type_VOLUME:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool BlenderSync::object_is_light(BL::Object &b_ob)
|
bool BlenderSync::object_is_light(BL::Object &b_ob)
|
||||||
{
|
{
|
||||||
BL::ID b_ob_data = b_ob.data();
|
BL::ID b_ob_data = b_ob.data();
|
||||||
@@ -189,7 +207,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* only interested in object that we can create meshes from */
|
/* only interested in object that we can create meshes from */
|
||||||
if (!object_is_geometry(b_ob)) {
|
if (!object_is_geometry(b_ob_info)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -276,7 +294,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
|||||||
|
|
||||||
object->set_visibility(visibility);
|
object->set_visibility(visibility);
|
||||||
|
|
||||||
object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
|
object->set_is_shadow_catcher(b_ob.is_shadow_catcher() || b_parent.is_shadow_catcher());
|
||||||
|
|
||||||
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
|
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
|
||||||
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
|
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
|
||||||
|
@@ -129,7 +129,7 @@ void BlenderSession::create_session()
|
|||||||
/* reset status/progress */
|
/* reset status/progress */
|
||||||
last_status = "";
|
last_status = "";
|
||||||
last_error = "";
|
last_error = "";
|
||||||
last_progress = -1.0f;
|
last_progress = -1.0;
|
||||||
start_resize_time = 0.0;
|
start_resize_time = 0.0;
|
||||||
|
|
||||||
/* create session */
|
/* create session */
|
||||||
@@ -615,6 +615,24 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
|||||||
sync->sync_camera(b_render, b_camera_override, width, height, "");
|
sync->sync_camera(b_render, b_camera_override, width, height, "");
|
||||||
sync->sync_data(
|
sync->sync_data(
|
||||||
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
|
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
|
||||||
|
|
||||||
|
/* Filtering settings for combined pass. */
|
||||||
|
if (pass->get_type() == PASS_COMBINED) {
|
||||||
|
Integrator *integrator = scene->integrator;
|
||||||
|
integrator->set_use_direct_light((bake_filter & BL::BakeSettings::pass_filter_DIRECT) != 0);
|
||||||
|
integrator->set_use_indirect_light((bake_filter & BL::BakeSettings::pass_filter_INDIRECT) !=
|
||||||
|
0);
|
||||||
|
integrator->set_use_diffuse((bake_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0);
|
||||||
|
integrator->set_use_glossy((bake_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0);
|
||||||
|
integrator->set_use_transmission(
|
||||||
|
(bake_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0);
|
||||||
|
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Always use transpanent background for baking. */
|
||||||
|
scene->background->set_transparent(true);
|
||||||
|
|
||||||
|
/* Load built-in images from Blender. */
|
||||||
builtin_images_load();
|
builtin_images_load();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -841,7 +859,7 @@ void BlenderSession::get_status(string &status, string &substatus)
|
|||||||
session->progress.get_status(status, substatus);
|
session->progress.get_status(status, substatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
|
void BlenderSession::get_progress(double &progress, double &total_time, double &render_time)
|
||||||
{
|
{
|
||||||
session->progress.get_time(total_time, render_time);
|
session->progress.get_time(total_time, render_time);
|
||||||
progress = session->progress.get_progress();
|
progress = session->progress.get_progress();
|
||||||
@@ -849,10 +867,10 @@ void BlenderSession::get_progress(float &progress, double &total_time, double &r
|
|||||||
|
|
||||||
void BlenderSession::update_bake_progress()
|
void BlenderSession::update_bake_progress()
|
||||||
{
|
{
|
||||||
float progress = session->progress.get_progress();
|
double progress = session->progress.get_progress();
|
||||||
|
|
||||||
if (progress != last_progress) {
|
if (progress != last_progress) {
|
||||||
b_engine.update_progress(progress);
|
b_engine.update_progress((float)progress);
|
||||||
last_progress = progress;
|
last_progress = progress;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -861,7 +879,7 @@ void BlenderSession::update_status_progress()
|
|||||||
{
|
{
|
||||||
string timestatus, status, substatus;
|
string timestatus, status, substatus;
|
||||||
string scene_status = "";
|
string scene_status = "";
|
||||||
float progress;
|
double progress;
|
||||||
double total_time, remaining_time = 0, render_time;
|
double total_time, remaining_time = 0, render_time;
|
||||||
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
|
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
|
||||||
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
|
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
|
||||||
@@ -905,7 +923,7 @@ void BlenderSession::update_status_progress()
|
|||||||
last_status_time = current_time;
|
last_status_time = current_time;
|
||||||
}
|
}
|
||||||
if (progress != last_progress) {
|
if (progress != last_progress) {
|
||||||
b_engine.update_progress(progress);
|
b_engine.update_progress((float)progress);
|
||||||
last_progress = progress;
|
last_progress = progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -82,7 +82,7 @@ class BlenderSession {
|
|||||||
void tag_redraw();
|
void tag_redraw();
|
||||||
void tag_update();
|
void tag_update();
|
||||||
void get_status(string &status, string &substatus);
|
void get_status(string &status, string &substatus);
|
||||||
void get_progress(float &progress, double &total_time, double &render_time);
|
void get_progress(double &progress, double &total_time, double &render_time);
|
||||||
void test_cancel();
|
void test_cancel();
|
||||||
void update_status_progress();
|
void update_status_progress();
|
||||||
void update_bake_progress();
|
void update_bake_progress();
|
||||||
@@ -108,7 +108,7 @@ class BlenderSession {
|
|||||||
|
|
||||||
string last_status;
|
string last_status;
|
||||||
string last_error;
|
string last_error;
|
||||||
float last_progress;
|
double last_progress;
|
||||||
double last_status_time;
|
double last_status_time;
|
||||||
|
|
||||||
int width, height;
|
int width, height;
|
||||||
|
@@ -162,19 +162,19 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
|
|||||||
/* Object */
|
/* Object */
|
||||||
else if (b_id.is_a(&RNA_Object)) {
|
else if (b_id.is_a(&RNA_Object)) {
|
||||||
BL::Object b_ob(b_id);
|
BL::Object b_ob(b_id);
|
||||||
const bool is_geometry = object_is_geometry(b_ob);
|
const bool can_have_geometry = object_can_have_geometry(b_ob);
|
||||||
const bool is_light = !is_geometry && object_is_light(b_ob);
|
const bool is_light = !can_have_geometry && object_is_light(b_ob);
|
||||||
|
|
||||||
if (b_ob.is_instancer() && b_update.is_updated_shading()) {
|
if (b_ob.is_instancer() && b_update.is_updated_shading()) {
|
||||||
/* Needed for e.g. object color updates on instancer. */
|
/* Needed for e.g. object color updates on instancer. */
|
||||||
object_map.set_recalc(b_ob);
|
object_map.set_recalc(b_ob);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_geometry || is_light) {
|
if (can_have_geometry || is_light) {
|
||||||
const bool updated_geometry = b_update.is_updated_geometry();
|
const bool updated_geometry = b_update.is_updated_geometry();
|
||||||
|
|
||||||
/* Geometry (mesh, hair, volume). */
|
/* Geometry (mesh, hair, volume). */
|
||||||
if (is_geometry) {
|
if (can_have_geometry) {
|
||||||
if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
|
if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
|
||||||
object_map.set_recalc(b_ob);
|
object_map.set_recalc(b_ob);
|
||||||
}
|
}
|
||||||
@@ -365,8 +365,8 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
|||||||
|
|
||||||
int samples = get_int(cscene, "samples");
|
int samples = get_int(cscene, "samples");
|
||||||
float scrambling_distance = get_float(cscene, "scrambling_distance");
|
float scrambling_distance = get_float(cscene, "scrambling_distance");
|
||||||
bool adaptive_scrambling_distance = get_boolean(cscene, "adaptive_scrambling_distance");
|
bool auto_scrambling_distance = get_boolean(cscene, "auto_scrambling_distance");
|
||||||
if (adaptive_scrambling_distance) {
|
if (auto_scrambling_distance) {
|
||||||
scrambling_distance *= 4.0f / sqrtf(samples);
|
scrambling_distance *= 4.0f / sqrtf(samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -392,6 +392,12 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
|||||||
integrator->set_ao_bounces(0);
|
integrator->set_ao_bounces(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WITH_CYCLES_DEBUG
|
||||||
|
DirectLightSamplingType direct_light_sampling_type = (DirectLightSamplingType)get_enum(
|
||||||
|
cscene, "direct_light_sampling_type", DIRECT_LIGHT_SAMPLING_NUM, DIRECT_LIGHT_SAMPLING_MIS);
|
||||||
|
integrator->set_direct_light_sampling_type(direct_light_sampling_type);
|
||||||
|
#endif
|
||||||
|
|
||||||
const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
|
const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
|
||||||
integrator->set_use_denoise(denoise_params.use);
|
integrator->set_use_denoise(denoise_params.use);
|
||||||
|
|
||||||
@@ -835,18 +841,25 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
|||||||
/* samples */
|
/* samples */
|
||||||
int samples = get_int(cscene, "samples");
|
int samples = get_int(cscene, "samples");
|
||||||
int preview_samples = get_int(cscene, "preview_samples");
|
int preview_samples = get_int(cscene, "preview_samples");
|
||||||
|
int sample_offset = get_int(cscene, "sample_offset");
|
||||||
|
|
||||||
if (background) {
|
if (background) {
|
||||||
params.samples = samples;
|
params.samples = samples;
|
||||||
|
params.sample_offset = sample_offset;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
params.samples = preview_samples;
|
params.samples = preview_samples;
|
||||||
if (params.samples == 0)
|
if (params.samples == 0) {
|
||||||
params.samples = INT_MAX;
|
params.samples = INT_MAX;
|
||||||
}
|
}
|
||||||
|
params.sample_offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clamp sample offset. */
|
||||||
|
params.sample_offset = clamp(params.sample_offset, 0, Integrator::MAX_SAMPLES);
|
||||||
|
|
||||||
/* Clamp samples. */
|
/* Clamp samples. */
|
||||||
params.samples = min(params.samples, Integrator::MAX_SAMPLES);
|
params.samples = clamp(params.samples, 0, Integrator::MAX_SAMPLES - params.sample_offset);
|
||||||
|
|
||||||
/* Viewport Performance */
|
/* Viewport Performance */
|
||||||
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
|
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
|
||||||
@@ -865,7 +878,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
|||||||
|
|
||||||
/* Time limit. */
|
/* Time limit. */
|
||||||
if (background) {
|
if (background) {
|
||||||
params.time_limit = get_float(cscene, "time_limit");
|
params.time_limit = (double)get_float(cscene, "time_limit");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
|
/* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
|
||||||
|
@@ -208,7 +208,8 @@ class BlenderSync {
|
|||||||
/* util */
|
/* util */
|
||||||
void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
|
void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
|
||||||
bool BKE_object_is_modified(BL::Object &b_ob);
|
bool BKE_object_is_modified(BL::Object &b_ob);
|
||||||
bool object_is_geometry(BL::Object &b_ob);
|
bool object_is_geometry(BObjectInfo &b_ob_info);
|
||||||
|
bool object_can_have_geometry(BL::Object &b_ob);
|
||||||
bool object_is_light(BL::Object &b_ob);
|
bool object_is_light(BL::Object &b_ob);
|
||||||
|
|
||||||
/* variables */
|
/* variables */
|
||||||
|
@@ -303,7 +303,7 @@ static inline string image_user_file_path(BL::ImageUser &iuser,
|
|||||||
string filepath_str = string(filepath);
|
string filepath_str = string(filepath);
|
||||||
if (load_tiled && ima.source() == BL::Image::source_TILED) {
|
if (load_tiled && ima.source() == BL::Image::source_TILED) {
|
||||||
string udim;
|
string udim;
|
||||||
if (ima.tiles.length() > 0) {
|
if (!ima.tiles.empty()) {
|
||||||
udim = to_string(ima.tiles[0].number());
|
udim = to_string(ima.tiles[0].number());
|
||||||
}
|
}
|
||||||
string_replace(filepath_str, udim, "<UDIM>");
|
string_replace(filepath_str, udim, "<UDIM>");
|
||||||
@@ -647,7 +647,7 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
|
|||||||
{
|
{
|
||||||
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
|
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
|
||||||
|
|
||||||
if (cobj.data && b_ob.modifiers.length() > 0 && experimental) {
|
if (cobj.data && !b_ob.modifiers.empty() && experimental) {
|
||||||
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
|
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
|
||||||
bool enabled = preview ? mod.show_viewport() : mod.show_render();
|
bool enabled = preview ? mod.show_viewport() : mod.show_render();
|
||||||
|
|
||||||
|
@@ -303,7 +303,7 @@ static void rtc_error_func(void *, enum RTCError, const char *str)
|
|||||||
VLOG(1) << str;
|
VLOG(1) << str;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double progress_start_time = 0.0f;
|
static double progress_start_time = 0.0;
|
||||||
|
|
||||||
static bool rtc_progress_func(void *user_ptr, const double n)
|
static bool rtc_progress_func(void *user_ptr, const double n)
|
||||||
{
|
{
|
||||||
|
@@ -153,7 +153,7 @@ void BVHNode::update_time()
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct DumpTraversalContext {
|
struct DumpTraversalContext {
|
||||||
/* Descriptor of wile where writing is happening. */
|
/* Descriptor of while where writing is happening. */
|
||||||
FILE *stream;
|
FILE *stream;
|
||||||
/* Unique identifier of the node current. */
|
/* Unique identifier of the node current. */
|
||||||
int id;
|
int id;
|
||||||
|
@@ -178,7 +178,7 @@ class InnerNode : public BVHNode {
|
|||||||
reset_unused_children();
|
reset_unused_children();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NOTE: This function is only used during binary BVH builder, and it
|
/* NOTE: This function is only used during binary BVH builder, and it's
|
||||||
* supposed to be configured to have 2 children which will be filled-in in a
|
* supposed to be configured to have 2 children which will be filled-in in a
|
||||||
* bit. But this is important to have children reset to NULL. */
|
* bit. But this is important to have children reset to NULL. */
|
||||||
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
|
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
|
||||||
|
@@ -30,15 +30,17 @@ BVHOptiX::BVHOptiX(const BVHParams ¶ms_,
|
|||||||
: BVH(params_, geometry_, objects_),
|
: BVH(params_, geometry_, objects_),
|
||||||
device(device),
|
device(device),
|
||||||
traversable_handle(0),
|
traversable_handle(0),
|
||||||
as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
|
as_data(make_unique<device_only_memory<char>>(
|
||||||
motion_transform_data(device, "optix motion transform", false)
|
device, params.top_level ? "optix tlas" : "optix blas", false)),
|
||||||
|
motion_transform_data(
|
||||||
|
make_unique<device_only_memory<char>>(device, "optix motion transform", false))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
BVHOptiX::~BVHOptiX()
|
BVHOptiX::~BVHOptiX()
|
||||||
{
|
{
|
||||||
// Acceleration structure memory is delayed freed on device, since deleting the
|
/* Acceleration structure memory is delayed freed on device, since deleting the
|
||||||
// BVH may happen while still being used for rendering.
|
* BVH may happen while still being used for rendering. */
|
||||||
device->release_optix_bvh(this);
|
device->release_optix_bvh(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -25,14 +25,16 @@
|
|||||||
|
|
||||||
# include "device/memory.h"
|
# include "device/memory.h"
|
||||||
|
|
||||||
|
# include "util/unique_ptr.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
class BVHOptiX : public BVH {
|
class BVHOptiX : public BVH {
|
||||||
public:
|
public:
|
||||||
Device *device;
|
Device *device;
|
||||||
uint64_t traversable_handle;
|
uint64_t traversable_handle;
|
||||||
device_only_memory<char> as_data;
|
unique_ptr<device_only_memory<char>> as_data;
|
||||||
device_only_memory<char> motion_transform_data;
|
unique_ptr<device_only_memory<char>> motion_transform_data;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class BVH;
|
friend class BVH;
|
||||||
|
@@ -88,7 +88,7 @@ endmacro()
|
|||||||
|
|
||||||
function(cycles_link_directories)
|
function(cycles_link_directories)
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
# APPLE plaform uses full paths for linking libraries, and avoids link_directories.
|
# APPLE platform uses full paths for linking libraries, and avoids link_directories.
|
||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@@ -93,11 +93,6 @@ CPUDevice::~CPUDevice()
|
|||||||
texture_info.free();
|
texture_info.free();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CPUDevice::show_samples() const
|
|
||||||
{
|
|
||||||
return (info.cpu_threads == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
|
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
|
||||||
{
|
{
|
||||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
|
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
|
||||||
|
@@ -60,8 +60,6 @@ class CPUDevice : public Device {
|
|||||||
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
||||||
~CPUDevice();
|
~CPUDevice();
|
||||||
|
|
||||||
virtual bool show_samples() const override;
|
|
||||||
|
|
||||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||||
|
|
||||||
/* Returns true if the texture info was copied to the device (meaning, some more
|
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||||
|
@@ -42,7 +42,7 @@ class CPUKernels {
|
|||||||
|
|
||||||
IntegratorInitFunction integrator_init_from_camera;
|
IntegratorInitFunction integrator_init_from_camera;
|
||||||
IntegratorInitFunction integrator_init_from_bake;
|
IntegratorInitFunction integrator_init_from_bake;
|
||||||
IntegratorFunction integrator_intersect_closest;
|
IntegratorShadeFunction integrator_intersect_closest;
|
||||||
IntegratorFunction integrator_intersect_shadow;
|
IntegratorFunction integrator_intersect_shadow;
|
||||||
IntegratorFunction integrator_intersect_subsurface;
|
IntegratorFunction integrator_intersect_subsurface;
|
||||||
IntegratorFunction integrator_intersect_volume_stack;
|
IntegratorFunction integrator_intersect_volume_stack;
|
||||||
|
@@ -46,12 +46,6 @@ bool CUDADevice::have_precompiled_kernels()
|
|||||||
return path_exists(cubins_path);
|
return path_exists(cubins_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CUDADevice::show_samples() const
|
|
||||||
{
|
|
||||||
/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
|
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
|
||||||
{
|
{
|
||||||
return BVH_LAYOUT_BVH2;
|
return BVH_LAYOUT_BVH2;
|
||||||
@@ -242,6 +236,10 @@ string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
|||||||
cflags += " -DWITH_NANOVDB";
|
cflags += " -DWITH_NANOVDB";
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# ifdef WITH_CYCLES_DEBUG
|
||||||
|
cflags += " -DWITH_CYCLES_DEBUG";
|
||||||
|
# endif
|
||||||
|
|
||||||
return cflags;
|
return cflags;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -777,6 +775,7 @@ void CUDADevice::generic_free(device_memory &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||||
|
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||||
|
|
||||||
/* If cmem.use_mapped_host is true, reference counting is used
|
/* If cmem.use_mapped_host is true, reference counting is used
|
||||||
@@ -931,7 +930,6 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
|||||||
{
|
{
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
|
|
||||||
/* General variables for both architectures */
|
|
||||||
string bind_name = mem.name;
|
string bind_name = mem.name;
|
||||||
size_t dsize = datatype_size(mem.data_type);
|
size_t dsize = datatype_size(mem.data_type);
|
||||||
size_t size = mem.memory_size();
|
size_t size = mem.memory_size();
|
||||||
@@ -1094,7 +1092,6 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
|||||||
|
|
||||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||||
/* Kepler+, bindless textures. */
|
|
||||||
CUDA_RESOURCE_DESC resDesc;
|
CUDA_RESOURCE_DESC resDesc;
|
||||||
memset(&resDesc, 0, sizeof(resDesc));
|
memset(&resDesc, 0, sizeof(resDesc));
|
||||||
|
|
||||||
@@ -1145,6 +1142,7 @@ void CUDADevice::tex_free(device_texture &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||||
|
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||||
|
|
||||||
if (cmem.texobject) {
|
if (cmem.texobject) {
|
||||||
|
@@ -76,8 +76,6 @@ class CUDADevice : public Device {
|
|||||||
|
|
||||||
static bool have_precompiled_kernels();
|
static bool have_precompiled_kernels();
|
||||||
|
|
||||||
virtual bool show_samples() const override;
|
|
||||||
|
|
||||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||||
|
|
||||||
void set_error(const string &error) override;
|
void set_error(const string &error) override;
|
||||||
|
@@ -149,10 +149,6 @@ class Device {
|
|||||||
fprintf(stderr, "%s\n", error.c_str());
|
fprintf(stderr, "%s\n", error.c_str());
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
}
|
}
|
||||||
virtual bool show_samples() const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
|
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
|
||||||
|
|
||||||
/* statistics */
|
/* statistics */
|
||||||
|
@@ -57,9 +57,16 @@ bool device_hip_init()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
VLOG(1) << "HIPEW initialization failed: "
|
if (hipew_result == HIPEW_ERROR_ATEXIT_FAILED) {
|
||||||
<< ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
|
VLOG(1) << "HIPEW initialization failed: Error setting up atexit() handler";
|
||||||
"Error opening the library");
|
}
|
||||||
|
else if (hipew_result == HIPEW_ERROR_OLD_DRIVER) {
|
||||||
|
VLOG(1) << "HIPEW initialization failed: Driver version too old, requires AMD Radeon Pro "
|
||||||
|
"21.Q4 driver or newer";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
VLOG(1) << "HIPEW initialization failed: Error opening HIP dynamic library";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@@ -47,12 +47,6 @@ bool HIPDevice::have_precompiled_kernels()
|
|||||||
return path_exists(fatbins_path);
|
return path_exists(fatbins_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HIPDevice::show_samples() const
|
|
||||||
{
|
|
||||||
/* The HIPDevice only processes one tile at a time, so showing samples is fine. */
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
|
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
|
||||||
{
|
{
|
||||||
return BVH_LAYOUT_BVH2;
|
return BVH_LAYOUT_BVH2;
|
||||||
@@ -99,7 +93,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Setup device and context. */
|
/* Setup device and context. */
|
||||||
result = hipGetDevice(&hipDevice, hipDevId);
|
result = hipDeviceGet(&hipDevice, hipDevId);
|
||||||
if (result != hipSuccess) {
|
if (result != hipSuccess) {
|
||||||
set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
|
set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
|
||||||
hipewErrorString(result)));
|
hipewErrorString(result)));
|
||||||
@@ -154,7 +148,7 @@ bool HIPDevice::support_device(const uint /*kernel_features*/)
|
|||||||
hipDeviceProp_t props;
|
hipDeviceProp_t props;
|
||||||
hipGetDeviceProperties(&props, hipDevId);
|
hipGetDeviceProperties(&props, hipDevId);
|
||||||
|
|
||||||
set_error(string_printf("HIP backend requires AMD RDNA2 graphics card or up, but found %s.",
|
set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
|
||||||
props.name));
|
props.name));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -222,7 +216,6 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
|||||||
const string include_path = source_path;
|
const string include_path = source_path;
|
||||||
string cflags = string_printf(
|
string cflags = string_printf(
|
||||||
"-m%d "
|
"-m%d "
|
||||||
"--ptxas-options=\"-v\" "
|
|
||||||
"--use_fast_math "
|
"--use_fast_math "
|
||||||
"-DHIPCC "
|
"-DHIPCC "
|
||||||
"-I\"%s\"",
|
"-I\"%s\"",
|
||||||
@@ -234,10 +227,7 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
|||||||
return cflags;
|
return cflags;
|
||||||
}
|
}
|
||||||
|
|
||||||
string HIPDevice::compile_kernel(const uint kernel_features,
|
string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
|
||||||
const char *name,
|
|
||||||
const char *base,
|
|
||||||
bool force_ptx)
|
|
||||||
{
|
{
|
||||||
/* Compute kernel name. */
|
/* Compute kernel name. */
|
||||||
int major, minor;
|
int major, minor;
|
||||||
@@ -247,7 +237,7 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
hipGetDeviceProperties(&props, hipDevId);
|
hipGetDeviceProperties(&props, hipDevId);
|
||||||
|
|
||||||
/* gcnArchName can contain tokens after the arch name with features, ie.
|
/* gcnArchName can contain tokens after the arch name with features, ie.
|
||||||
"gfx1010:sramecc-:xnack-" so we tokenize it to get the first part. */
|
* `gfx1010:sramecc-:xnack-` so we tokenize it to get the first part. */
|
||||||
char *arch = strtok(props.gcnArchName, ":");
|
char *arch = strtok(props.gcnArchName, ":");
|
||||||
if (arch == NULL) {
|
if (arch == NULL) {
|
||||||
arch = props.gcnArchName;
|
arch = props.gcnArchName;
|
||||||
@@ -255,7 +245,6 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
|
|
||||||
/* Attempt to use kernel provided with Blender. */
|
/* Attempt to use kernel provided with Blender. */
|
||||||
if (!use_adaptive_compilation()) {
|
if (!use_adaptive_compilation()) {
|
||||||
if (!force_ptx) {
|
|
||||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
|
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
|
||||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||||
if (path_exists(fatbin)) {
|
if (path_exists(fatbin)) {
|
||||||
@@ -263,7 +252,6 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
return fatbin;
|
return fatbin;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Try to use locally compiled kernel. */
|
/* Try to use locally compiled kernel. */
|
||||||
string source_path = path_get("source");
|
string source_path = path_get("source");
|
||||||
@@ -298,9 +286,9 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
|
|
||||||
# ifdef _WIN32
|
# ifdef _WIN32
|
||||||
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
||||||
if (major < 3) {
|
if (!hipSupportsDevice(hipDevId)) {
|
||||||
set_error(
|
set_error(
|
||||||
string_printf("HIP backend requires compute capability 3.0 or up, but found %d.%d. "
|
string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
|
||||||
"Your GPU is not supported.",
|
"Your GPU is not supported.",
|
||||||
major,
|
major,
|
||||||
minor));
|
minor));
|
||||||
@@ -380,10 +368,9 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
|
|
||||||
bool HIPDevice::load_kernels(const uint kernel_features)
|
bool HIPDevice::load_kernels(const uint kernel_features)
|
||||||
{
|
{
|
||||||
/* TODO(sergey): Support kernels re-load for CUDA devices adaptive compile.
|
/* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
|
||||||
*
|
*
|
||||||
* Currently re-loading kernel will invalidate memory pointers,
|
* Currently re-loading kernels will invalidate memory pointers.
|
||||||
* causing problems in cuCtxSynchronize.
|
|
||||||
*/
|
*/
|
||||||
if (hipModule) {
|
if (hipModule) {
|
||||||
if (use_adaptive_compilation()) {
|
if (use_adaptive_compilation()) {
|
||||||
@@ -751,6 +738,7 @@ void HIPDevice::generic_free(device_memory &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||||
|
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||||
const HIPMem &cmem = hip_mem_map[&mem];
|
const HIPMem &cmem = hip_mem_map[&mem];
|
||||||
|
|
||||||
/* If cmem.use_mapped_host is true, reference counting is used
|
/* If cmem.use_mapped_host is true, reference counting is used
|
||||||
@@ -904,7 +892,6 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
{
|
{
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
/* General variables for both architectures */
|
|
||||||
string bind_name = mem.name;
|
string bind_name = mem.name;
|
||||||
size_t dsize = datatype_size(mem.data_type);
|
size_t dsize = datatype_size(mem.data_type);
|
||||||
size_t size = mem.memory_size();
|
size_t size = mem.memory_size();
|
||||||
@@ -994,16 +981,16 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
hip_assert(hipArray3DCreate(&array_3d, &desc));
|
hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
|
||||||
|
|
||||||
if (!array_3d) {
|
if (!array_3d) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_MEMCPY3D param;
|
HIP_MEMCPY3D param;
|
||||||
memset(¶m, 0, sizeof(param));
|
memset(¶m, 0, sizeof(HIP_MEMCPY3D));
|
||||||
param.dstMemoryType = hipMemoryTypeArray;
|
param.dstMemoryType = hipMemoryTypeArray;
|
||||||
param.dstArray = &array_3d;
|
param.dstArray = array_3d;
|
||||||
param.srcMemoryType = hipMemoryTypeHost;
|
param.srcMemoryType = hipMemoryTypeHost;
|
||||||
param.srcHost = mem.host_pointer;
|
param.srcHost = mem.host_pointer;
|
||||||
param.srcPitch = src_pitch;
|
param.srcPitch = src_pitch;
|
||||||
@@ -1069,13 +1056,13 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
|
|
||||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||||
/* Kepler+, bindless textures. */
|
/* Bindless textures. */
|
||||||
hipResourceDesc resDesc;
|
hipResourceDesc resDesc;
|
||||||
memset(&resDesc, 0, sizeof(resDesc));
|
memset(&resDesc, 0, sizeof(resDesc));
|
||||||
|
|
||||||
if (array_3d) {
|
if (array_3d) {
|
||||||
resDesc.resType = hipResourceTypeArray;
|
resDesc.resType = hipResourceTypeArray;
|
||||||
resDesc.res.array.h_Array = &array_3d;
|
resDesc.res.array.h_Array = array_3d;
|
||||||
resDesc.flags = 0;
|
resDesc.flags = 0;
|
||||||
}
|
}
|
||||||
else if (mem.data_height > 0) {
|
else if (mem.data_height > 0) {
|
||||||
@@ -1120,6 +1107,7 @@ void HIPDevice::tex_free(device_texture &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||||
|
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||||
const HIPMem &cmem = hip_mem_map[&mem];
|
const HIPMem &cmem = hip_mem_map[&mem];
|
||||||
|
|
||||||
if (cmem.texobject) {
|
if (cmem.texobject) {
|
||||||
@@ -1160,6 +1148,8 @@ bool HIPDevice::should_use_graphics_interop()
|
|||||||
* possible, but from the empiric measurements it can be considerably slower than using naive
|
* possible, but from the empiric measurements it can be considerably slower than using naive
|
||||||
* pixels copy. */
|
* pixels copy. */
|
||||||
|
|
||||||
|
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
|
||||||
|
# if 0
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
int num_all_devices = 0;
|
int num_all_devices = 0;
|
||||||
@@ -1178,6 +1168,7 @@ bool HIPDevice::should_use_graphics_interop()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@@ -75,8 +75,6 @@ class HIPDevice : public Device {
|
|||||||
|
|
||||||
static bool have_precompiled_kernels();
|
static bool have_precompiled_kernels();
|
||||||
|
|
||||||
virtual bool show_samples() const override;
|
|
||||||
|
|
||||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||||
|
|
||||||
void set_error(const string &error) override;
|
void set_error(const string &error) override;
|
||||||
@@ -93,10 +91,7 @@ class HIPDevice : public Device {
|
|||||||
|
|
||||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||||
|
|
||||||
string compile_kernel(const uint kernel_features,
|
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
|
||||||
const char *name,
|
|
||||||
const char *base = "hip",
|
|
||||||
bool force_ptx = false);
|
|
||||||
|
|
||||||
virtual bool load_kernels(const uint kernel_features) override;
|
virtual bool load_kernels(const uint kernel_features) override;
|
||||||
void reserve_local_memory(const uint kernel_features);
|
void reserve_local_memory(const uint kernel_features);
|
||||||
|
@@ -48,7 +48,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
|||||||
HIPDeviceQueue *queue_ = nullptr;
|
HIPDeviceQueue *queue_ = nullptr;
|
||||||
HIPDevice *device_ = nullptr;
|
HIPDevice *device_ = nullptr;
|
||||||
|
|
||||||
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
|
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
|
||||||
uint opengl_pbo_id_ = 0;
|
uint opengl_pbo_id_ = 0;
|
||||||
/* Buffer area in pixels of the corresponding PBO. */
|
/* Buffer area in pixels of the corresponding PBO. */
|
||||||
int64_t buffer_area_ = 0;
|
int64_t buffer_area_ = 0;
|
||||||
|
@@ -64,7 +64,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
|
|||||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||||
|
|
||||||
return (major > 10) || (major == 10 && minor >= 3);
|
return (major > 10) || (major == 10 && minor >= 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
||||||
: data_type(device_type_traits<uchar>::data_type),
|
: data_type(device_type_traits<uchar>::data_type),
|
||||||
data_elements(device_type_traits<uchar>::num_elements_cpu),
|
data_elements(device_type_traits<uchar>::num_elements),
|
||||||
data_size(0),
|
data_size(0),
|
||||||
device_size(0),
|
device_size(0),
|
||||||
data_width(0),
|
data_width(0),
|
||||||
@@ -44,45 +44,6 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
device_memory::device_memory(device_memory &&other) noexcept
|
|
||||||
: data_type(other.data_type),
|
|
||||||
data_elements(other.data_elements),
|
|
||||||
data_size(other.data_size),
|
|
||||||
device_size(other.device_size),
|
|
||||||
data_width(other.data_width),
|
|
||||||
data_height(other.data_height),
|
|
||||||
data_depth(other.data_depth),
|
|
||||||
type(other.type),
|
|
||||||
name(other.name),
|
|
||||||
device(other.device),
|
|
||||||
device_pointer(other.device_pointer),
|
|
||||||
host_pointer(other.host_pointer),
|
|
||||||
shared_pointer(other.shared_pointer),
|
|
||||||
shared_counter(other.shared_counter),
|
|
||||||
original_device_ptr(other.original_device_ptr),
|
|
||||||
original_device_size(other.original_device_size),
|
|
||||||
original_device(other.original_device),
|
|
||||||
need_realloc_(other.need_realloc_),
|
|
||||||
modified(other.modified)
|
|
||||||
{
|
|
||||||
other.data_elements = 0;
|
|
||||||
other.data_size = 0;
|
|
||||||
other.device_size = 0;
|
|
||||||
other.data_width = 0;
|
|
||||||
other.data_height = 0;
|
|
||||||
other.data_depth = 0;
|
|
||||||
other.device = 0;
|
|
||||||
other.device_pointer = 0;
|
|
||||||
other.host_pointer = 0;
|
|
||||||
other.shared_pointer = 0;
|
|
||||||
other.shared_counter = 0;
|
|
||||||
other.original_device_ptr = 0;
|
|
||||||
other.original_device_size = 0;
|
|
||||||
other.original_device = 0;
|
|
||||||
other.need_realloc_ = false;
|
|
||||||
other.modified = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
device_memory::~device_memory()
|
device_memory::~device_memory()
|
||||||
{
|
{
|
||||||
assert(shared_pointer == 0);
|
assert(shared_pointer == 0);
|
||||||
|
@@ -81,155 +81,140 @@ static constexpr size_t datatype_size(DataType datatype)
|
|||||||
|
|
||||||
template<typename T> struct device_type_traits {
|
template<typename T> struct device_type_traits {
|
||||||
static const DataType data_type = TYPE_UNKNOWN;
|
static const DataType data_type = TYPE_UNKNOWN;
|
||||||
static const size_t num_elements_cpu = sizeof(T);
|
static const size_t num_elements = sizeof(T);
|
||||||
static const size_t num_elements_gpu = sizeof(T);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uchar> {
|
template<> struct device_type_traits<uchar> {
|
||||||
static const DataType data_type = TYPE_UCHAR;
|
static const DataType data_type = TYPE_UCHAR;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uchar2> {
|
template<> struct device_type_traits<uchar2> {
|
||||||
static const DataType data_type = TYPE_UCHAR;
|
static const DataType data_type = TYPE_UCHAR;
|
||||||
static const size_t num_elements_cpu = 2;
|
static const size_t num_elements = 2;
|
||||||
static const size_t num_elements_gpu = 2;
|
static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uchar3> {
|
template<> struct device_type_traits<uchar3> {
|
||||||
static const DataType data_type = TYPE_UCHAR;
|
static const DataType data_type = TYPE_UCHAR;
|
||||||
static const size_t num_elements_cpu = 3;
|
static const size_t num_elements = 3;
|
||||||
static const size_t num_elements_gpu = 3;
|
static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uchar4> {
|
template<> struct device_type_traits<uchar4> {
|
||||||
static const DataType data_type = TYPE_UCHAR;
|
static const DataType data_type = TYPE_UCHAR;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 4;
|
static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uint> {
|
template<> struct device_type_traits<uint> {
|
||||||
static const DataType data_type = TYPE_UINT;
|
static const DataType data_type = TYPE_UINT;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uint2> {
|
template<> struct device_type_traits<uint2> {
|
||||||
static const DataType data_type = TYPE_UINT;
|
static const DataType data_type = TYPE_UINT;
|
||||||
static const size_t num_elements_cpu = 2;
|
static const size_t num_elements = 2;
|
||||||
static const size_t num_elements_gpu = 2;
|
static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uint3> {
|
template<> struct device_type_traits<uint3> {
|
||||||
static const DataType data_type = TYPE_UINT;
|
static const DataType data_type = TYPE_UINT;
|
||||||
static const size_t num_elements_cpu = 3;
|
static const size_t num_elements = 3;
|
||||||
static const size_t num_elements_gpu = 3;
|
static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uint4> {
|
template<> struct device_type_traits<uint4> {
|
||||||
static const DataType data_type = TYPE_UINT;
|
static const DataType data_type = TYPE_UINT;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 4;
|
static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<int> {
|
template<> struct device_type_traits<int> {
|
||||||
static const DataType data_type = TYPE_INT;
|
static const DataType data_type = TYPE_INT;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(int) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<int2> {
|
template<> struct device_type_traits<int2> {
|
||||||
static const DataType data_type = TYPE_INT;
|
static const DataType data_type = TYPE_INT;
|
||||||
static const size_t num_elements_cpu = 2;
|
static const size_t num_elements = 2;
|
||||||
static const size_t num_elements_gpu = 2;
|
static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<int3> {
|
template<> struct device_type_traits<int3> {
|
||||||
static const DataType data_type = TYPE_INT;
|
static const DataType data_type = TYPE_INT;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 3;
|
static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<int4> {
|
template<> struct device_type_traits<int4> {
|
||||||
static const DataType data_type = TYPE_INT;
|
static const DataType data_type = TYPE_INT;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 4;
|
static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<float> {
|
template<> struct device_type_traits<float> {
|
||||||
static const DataType data_type = TYPE_FLOAT;
|
static const DataType data_type = TYPE_FLOAT;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(float) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<float2> {
|
template<> struct device_type_traits<float2> {
|
||||||
static const DataType data_type = TYPE_FLOAT;
|
static const DataType data_type = TYPE_FLOAT;
|
||||||
static const size_t num_elements_cpu = 2;
|
static const size_t num_elements = 2;
|
||||||
static const size_t num_elements_gpu = 2;
|
static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<float3> {
|
template<> struct device_type_traits<float3> {
|
||||||
|
/* float3 has different size depending on the device, can't use it for interchanging
|
||||||
|
* memory between CPU and GPU.
|
||||||
|
*
|
||||||
|
* Leave body empty to trigger a compile error if used. */
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct device_type_traits<packed_float3> {
|
||||||
static const DataType data_type = TYPE_FLOAT;
|
static const DataType data_type = TYPE_FLOAT;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 3;
|
||||||
static const size_t num_elements_gpu = 3;
|
static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<float4> {
|
template<> struct device_type_traits<float4> {
|
||||||
static const DataType data_type = TYPE_FLOAT;
|
static const DataType data_type = TYPE_FLOAT;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 4;
|
static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<half> {
|
template<> struct device_type_traits<half> {
|
||||||
static const DataType data_type = TYPE_HALF;
|
static const DataType data_type = TYPE_HALF;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(half) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<ushort4> {
|
template<> struct device_type_traits<ushort4> {
|
||||||
static const DataType data_type = TYPE_UINT16;
|
static const DataType data_type = TYPE_UINT16;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 4;
|
static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uint16_t> {
|
template<> struct device_type_traits<uint16_t> {
|
||||||
static const DataType data_type = TYPE_UINT16;
|
static const DataType data_type = TYPE_UINT16;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<half4> {
|
template<> struct device_type_traits<half4> {
|
||||||
static const DataType data_type = TYPE_HALF;
|
static const DataType data_type = TYPE_HALF;
|
||||||
static const size_t num_elements_cpu = 4;
|
static const size_t num_elements = 4;
|
||||||
static const size_t num_elements_gpu = 4;
|
static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct device_type_traits<uint64_t> {
|
template<> struct device_type_traits<uint64_t> {
|
||||||
static const DataType data_type = TYPE_UINT64;
|
static const DataType data_type = TYPE_UINT64;
|
||||||
static const size_t num_elements_cpu = 1;
|
static const size_t num_elements = 1;
|
||||||
static const size_t num_elements_gpu = 1;
|
static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
|
||||||
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Device Memory
|
/* Device Memory
|
||||||
@@ -281,11 +266,16 @@ class device_memory {
|
|||||||
|
|
||||||
/* Only create through subclasses. */
|
/* Only create through subclasses. */
|
||||||
device_memory(Device *device, const char *name, MemoryType type);
|
device_memory(Device *device, const char *name, MemoryType type);
|
||||||
device_memory(device_memory &&other) noexcept;
|
|
||||||
|
|
||||||
/* No copying allowed. */
|
/* No copying and allowed.
|
||||||
|
*
|
||||||
|
* This is because device implementation might need to register device memory in an allocation
|
||||||
|
* map of some sort and use pointer as a key to identify blocks. Moving data from one place to
|
||||||
|
* another bypassing device allocation routines will make those maps hard to maintain. */
|
||||||
device_memory(const device_memory &) = delete;
|
device_memory(const device_memory &) = delete;
|
||||||
|
device_memory(device_memory &&other) noexcept = delete;
|
||||||
device_memory &operator=(const device_memory &) = delete;
|
device_memory &operator=(const device_memory &) = delete;
|
||||||
|
device_memory &operator=(device_memory &&) = delete;
|
||||||
|
|
||||||
/* Host allocation on the device. All host_pointer memory should be
|
/* Host allocation on the device. All host_pointer memory should be
|
||||||
* allocated with these functions, for devices that support using
|
* allocated with these functions, for devices that support using
|
||||||
@@ -320,9 +310,7 @@ template<typename T> class device_only_memory : public device_memory {
|
|||||||
: device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
|
: device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
|
||||||
{
|
{
|
||||||
data_type = device_type_traits<T>::data_type;
|
data_type = device_type_traits<T>::data_type;
|
||||||
data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
|
data_elements = max(device_type_traits<T>::num_elements, 1);
|
||||||
device_type_traits<T>::num_elements_gpu,
|
|
||||||
1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
|
device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
|
||||||
@@ -378,15 +366,11 @@ template<typename T> class device_only_memory : public device_memory {
|
|||||||
|
|
||||||
template<typename T> class device_vector : public device_memory {
|
template<typename T> class device_vector : public device_memory {
|
||||||
public:
|
public:
|
||||||
/* Can only use this for types that have the same size on CPU and GPU. */
|
|
||||||
static_assert(device_type_traits<T>::num_elements_cpu ==
|
|
||||||
device_type_traits<T>::num_elements_gpu);
|
|
||||||
|
|
||||||
device_vector(Device *device, const char *name, MemoryType type)
|
device_vector(Device *device, const char *name, MemoryType type)
|
||||||
: device_memory(device, name, type)
|
: device_memory(device, name, type)
|
||||||
{
|
{
|
||||||
data_type = device_type_traits<T>::data_type;
|
data_type = device_type_traits<T>::data_type;
|
||||||
data_elements = device_type_traits<T>::num_elements_cpu;
|
data_elements = device_type_traits<T>::num_elements;
|
||||||
modified = true;
|
modified = true;
|
||||||
need_realloc_ = true;
|
need_realloc_ = true;
|
||||||
|
|
||||||
|
@@ -109,14 +109,6 @@ class MultiDevice : public Device {
|
|||||||
return error_msg;
|
return error_msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool show_samples() const override
|
|
||||||
{
|
|
||||||
if (devices.size() > 1) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return devices.front().device->show_samples();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
||||||
{
|
{
|
||||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||||
|
@@ -48,14 +48,6 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
OptiXDevice::Denoiser::~Denoiser()
|
|
||||||
{
|
|
||||||
const CUDAContextScope scope(device);
|
|
||||||
if (optix_denoiser != nullptr) {
|
|
||||||
optixDenoiserDestroy(optix_denoiser);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
: CUDADevice(info, stats, profiler),
|
: CUDADevice(info, stats, profiler),
|
||||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||||
@@ -133,6 +125,11 @@ OptiXDevice::~OptiXDevice()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make sure denoiser is destroyed before device context! */
|
||||||
|
if (denoiser_.optix_denoiser != nullptr) {
|
||||||
|
optixDenoiserDestroy(denoiser_.optix_denoiser);
|
||||||
|
}
|
||||||
|
|
||||||
optixDeviceContextDestroy(context);
|
optixDeviceContextDestroy(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -884,27 +881,31 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
|||||||
optix_assert(optixDenoiserComputeMemoryResources(
|
optix_assert(optixDenoiserComputeMemoryResources(
|
||||||
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
|
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
|
||||||
|
|
||||||
denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes;
|
/* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
|
||||||
|
denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
|
||||||
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
||||||
|
|
||||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||||
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
||||||
|
|
||||||
/* Initialize denoiser state for the current tile size. */
|
/* Initialize denoiser state for the current tile size. */
|
||||||
const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser,
|
const OptixResult result = optixDenoiserSetup(
|
||||||
denoiser_.queue.stream(),
|
denoiser_.optix_denoiser,
|
||||||
|
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||||
|
on a stream that is not the default stream */
|
||||||
buffer_params.width,
|
buffer_params.width,
|
||||||
buffer_params.height,
|
buffer_params.height,
|
||||||
denoiser_.state.device_pointer,
|
denoiser_.state.device_pointer,
|
||||||
denoiser_.scratch_offset,
|
denoiser_.scratch_offset,
|
||||||
denoiser_.state.device_pointer +
|
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
||||||
denoiser_.scratch_offset,
|
|
||||||
denoiser_.scratch_size);
|
denoiser_.scratch_size);
|
||||||
if (result != OPTIX_SUCCESS) {
|
if (result != OPTIX_SUCCESS) {
|
||||||
set_error("Failed to set up OptiX denoiser");
|
set_error("Failed to set up OptiX denoiser");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cuda_assert(cuCtxSynchronize());
|
||||||
|
|
||||||
denoiser_.is_configured = true;
|
denoiser_.is_configured = true;
|
||||||
denoiser_.configured_size.x = buffer_params.width;
|
denoiser_.configured_size.x = buffer_params.width;
|
||||||
denoiser_.configured_size.y = buffer_params.height;
|
denoiser_.configured_size.y = buffer_params.height;
|
||||||
@@ -939,8 +940,6 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
|||||||
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_vector<float> fake_albedo(this, "fake_albedo", MEM_READ_WRITE);
|
|
||||||
|
|
||||||
/* Optional albedo and color passes. */
|
/* Optional albedo and color passes. */
|
||||||
if (context.num_input_passes > 1) {
|
if (context.num_input_passes > 1) {
|
||||||
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||||
@@ -971,6 +970,7 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
|||||||
|
|
||||||
/* Finally run denoising. */
|
/* Finally run denoising. */
|
||||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||||
|
|
||||||
OptixDenoiserLayer image_layers = {};
|
OptixDenoiserLayer image_layers = {};
|
||||||
image_layers.input = color_layer;
|
image_layers.input = color_layer;
|
||||||
image_layers.output = output_layer;
|
image_layers.output = output_layer;
|
||||||
@@ -1032,7 +1032,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_only_memory<char> &out_data = bvh->as_data;
|
device_only_memory<char> &out_data = *bvh->as_data;
|
||||||
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
||||||
assert(out_data.device == this);
|
assert(out_data.device == this);
|
||||||
out_data.alloc_to_device(sizes.outputSizeInBytes);
|
out_data.alloc_to_device(sizes.outputSizeInBytes);
|
||||||
@@ -1123,7 +1123,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
operation = OPTIX_BUILD_OPERATION_UPDATE;
|
operation = OPTIX_BUILD_OPERATION_UPDATE;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
bvh_optix->as_data.free();
|
bvh_optix->as_data->free();
|
||||||
bvh_optix->traversable_handle = 0;
|
bvh_optix->traversable_handle = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1344,9 +1344,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
unsigned int num_instances = 0;
|
unsigned int num_instances = 0;
|
||||||
unsigned int max_num_instances = 0xFFFFFFFF;
|
unsigned int max_num_instances = 0xFFFFFFFF;
|
||||||
|
|
||||||
bvh_optix->as_data.free();
|
bvh_optix->as_data->free();
|
||||||
bvh_optix->traversable_handle = 0;
|
bvh_optix->traversable_handle = 0;
|
||||||
bvh_optix->motion_transform_data.free();
|
bvh_optix->motion_transform_data->free();
|
||||||
|
|
||||||
optixDeviceContextGetProperty(context,
|
optixDeviceContextGetProperty(context,
|
||||||
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
||||||
@@ -1379,8 +1379,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(bvh_optix->motion_transform_data.device == this);
|
assert(bvh_optix->motion_transform_data->device == this);
|
||||||
bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
|
bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Object *ob : bvh->objects) {
|
for (Object *ob : bvh->objects) {
|
||||||
@@ -1441,7 +1441,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
|
|
||||||
motion_transform_offset = align_up(motion_transform_offset,
|
motion_transform_offset = align_up(motion_transform_offset,
|
||||||
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
|
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
|
||||||
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
|
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
|
||||||
motion_transform_offset;
|
motion_transform_offset;
|
||||||
motion_transform_offset += motion_transform_size;
|
motion_transform_offset += motion_transform_size;
|
||||||
|
|
||||||
|
@@ -23,6 +23,7 @@
|
|||||||
# include "device/optix/queue.h"
|
# include "device/optix/queue.h"
|
||||||
# include "device/optix/util.h"
|
# include "device/optix/util.h"
|
||||||
# include "kernel/types.h"
|
# include "kernel/types.h"
|
||||||
|
# include "util/unique_ptr.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@@ -76,13 +77,12 @@ class OptiXDevice : public CUDADevice {
|
|||||||
device_only_memory<KernelParamsOptiX> launch_params;
|
device_only_memory<KernelParamsOptiX> launch_params;
|
||||||
OptixTraversableHandle tlas_handle = 0;
|
OptixTraversableHandle tlas_handle = 0;
|
||||||
|
|
||||||
vector<device_only_memory<char>> delayed_free_bvh_memory;
|
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||||
thread_mutex delayed_free_bvh_mutex;
|
thread_mutex delayed_free_bvh_mutex;
|
||||||
|
|
||||||
class Denoiser {
|
class Denoiser {
|
||||||
public:
|
public:
|
||||||
explicit Denoiser(OptiXDevice *device);
|
explicit Denoiser(OptiXDevice *device);
|
||||||
~Denoiser();
|
|
||||||
|
|
||||||
OptiXDevice *device;
|
OptiXDevice *device;
|
||||||
OptiXDeviceQueue queue;
|
OptiXDeviceQueue queue;
|
||||||
|
@@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
|||||||
sizeof(device_ptr),
|
sizeof(device_ptr),
|
||||||
cuda_stream_));
|
cuda_stream_));
|
||||||
|
|
||||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||||
cuda_device_assert(
|
cuda_device_assert(
|
||||||
cuda_device_,
|
cuda_device_,
|
||||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||||
|
@@ -3,7 +3,7 @@ This program uses code from various sources, the default license is Apache 2.0
|
|||||||
for all code, with the following exceptions.
|
for all code, with the following exceptions.
|
||||||
|
|
||||||
Modified BSD License
|
Modified BSD License
|
||||||
* Code adapated from Open Shading Language
|
* Code adapted from Open Shading Language
|
||||||
* Sobol direction vectors
|
* Sobol direction vectors
|
||||||
* Matrix inversion code from OpenEXR
|
* Matrix inversion code from OpenEXR
|
||||||
* MD5 Hash code
|
* MD5 Hash code
|
||||||
|
@@ -33,7 +33,10 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
|||||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
return make_unique<OIDNDenoiser>(path_trace_device, params);
|
/* Always fallback to OIDN. */
|
||||||
|
DenoiseParams oidn_params = params;
|
||||||
|
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
|
||||||
|
return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||||
|
@@ -47,9 +47,6 @@ static bool oidn_progress_monitor_function(void *user_ptr, double /*n*/)
|
|||||||
OIDNDenoiser *oidn_denoiser = reinterpret_cast<OIDNDenoiser *>(user_ptr);
|
OIDNDenoiser *oidn_denoiser = reinterpret_cast<OIDNDenoiser *>(user_ptr);
|
||||||
return !oidn_denoiser->is_cancelled();
|
return !oidn_denoiser->is_cancelled();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef WITH_OPENIMAGEDENOISE
|
|
||||||
|
|
||||||
class OIDNPass {
|
class OIDNPass {
|
||||||
public:
|
public:
|
||||||
@@ -547,7 +544,6 @@ class OIDNDenoiseContext {
|
|||||||
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
||||||
bool albedo_replaced_with_fake_ = false;
|
bool albedo_replaced_with_fake_ = false;
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
||||||
static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
|
static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
|
||||||
{
|
{
|
||||||
@@ -582,18 +578,20 @@ static void copy_render_buffers_to_device(unique_ptr<DeviceQueue> &queue,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
||||||
RenderBuffers *render_buffers,
|
RenderBuffers *render_buffers,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
bool allow_inplace_modification)
|
bool allow_inplace_modification)
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_OPENIMAGEDENOISE
|
||||||
thread_scoped_lock lock(mutex_);
|
thread_scoped_lock lock(mutex_);
|
||||||
|
|
||||||
/* Make sure the host-side data is available for denoising. */
|
/* Make sure the host-side data is available for denoising. */
|
||||||
unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
|
unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
|
||||||
copy_render_buffers_from_device(queue, render_buffers);
|
copy_render_buffers_from_device(queue, render_buffers);
|
||||||
|
|
||||||
#ifdef WITH_OPENIMAGEDENOISE
|
|
||||||
OIDNDenoiseContext context(
|
OIDNDenoiseContext context(
|
||||||
this, params_, buffer_params, render_buffers, num_samples, allow_inplace_modification);
|
this, params_, buffer_params, render_buffers, num_samples, allow_inplace_modification);
|
||||||
|
|
||||||
@@ -620,6 +618,11 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||||||
* copies data from the device it doesn't overwrite the denoiser buffers. */
|
* copies data from the device it doesn't overwrite the denoiser buffers. */
|
||||||
copy_render_buffers_to_device(queue, render_buffers);
|
copy_render_buffers_to_device(queue, render_buffers);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
(void)buffer_params;
|
||||||
|
(void)render_buffers;
|
||||||
|
(void)num_samples;
|
||||||
|
(void)allow_inplace_modification;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This code is not supposed to run when compiled without OIDN support, so can assume if we made
|
/* This code is not supposed to run when compiled without OIDN support, so can assume if we made
|
||||||
|
@@ -296,13 +296,13 @@ static BufferParams scale_buffer_params(const BufferParams ¶ms, int resoluti
|
|||||||
|
|
||||||
scaled_params.window_x = params.window_x / resolution_divider;
|
scaled_params.window_x = params.window_x / resolution_divider;
|
||||||
scaled_params.window_y = params.window_y / resolution_divider;
|
scaled_params.window_y = params.window_y / resolution_divider;
|
||||||
scaled_params.window_width = params.window_width / resolution_divider;
|
scaled_params.window_width = max(1, params.window_width / resolution_divider);
|
||||||
scaled_params.window_height = params.window_height / resolution_divider;
|
scaled_params.window_height = max(1, params.window_height / resolution_divider);
|
||||||
|
|
||||||
scaled_params.full_x = params.full_x / resolution_divider;
|
scaled_params.full_x = params.full_x / resolution_divider;
|
||||||
scaled_params.full_y = params.full_y / resolution_divider;
|
scaled_params.full_y = params.full_y / resolution_divider;
|
||||||
scaled_params.full_width = params.full_width / resolution_divider;
|
scaled_params.full_width = max(1, params.full_width / resolution_divider);
|
||||||
scaled_params.full_height = params.full_height / resolution_divider;
|
scaled_params.full_height = max(1, params.full_height / resolution_divider);
|
||||||
|
|
||||||
scaled_params.update_offset_stride();
|
scaled_params.update_offset_stride();
|
||||||
|
|
||||||
@@ -380,7 +380,10 @@ void PathTrace::path_trace(RenderWork &render_work)
|
|||||||
PathTraceWork *path_trace_work = path_trace_works_[i].get();
|
PathTraceWork *path_trace_work = path_trace_works_[i].get();
|
||||||
|
|
||||||
PathTraceWork::RenderStatistics statistics;
|
PathTraceWork::RenderStatistics statistics;
|
||||||
path_trace_work->render_samples(statistics, render_work.path_trace.start_sample, num_samples);
|
path_trace_work->render_samples(statistics,
|
||||||
|
render_work.path_trace.start_sample,
|
||||||
|
num_samples,
|
||||||
|
render_work.path_trace.sample_offset);
|
||||||
|
|
||||||
const double work_time = time_dt() - work_start_time;
|
const double work_time = time_dt() - work_start_time;
|
||||||
work_balance_infos_[i].time_spent += work_time;
|
work_balance_infos_[i].time_spent += work_time;
|
||||||
@@ -847,9 +850,11 @@ void PathTrace::progress_update_if_needed(const RenderWork &render_work)
|
|||||||
{
|
{
|
||||||
if (progress_ != nullptr) {
|
if (progress_ != nullptr) {
|
||||||
const int2 tile_size = get_render_tile_size();
|
const int2 tile_size = get_render_tile_size();
|
||||||
const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
|
const uint64_t num_samples_added = uint64_t(tile_size.x) * tile_size.y *
|
||||||
const int current_sample = render_work.path_trace.start_sample +
|
|
||||||
render_work.path_trace.num_samples;
|
render_work.path_trace.num_samples;
|
||||||
|
const int current_sample = render_work.path_trace.start_sample +
|
||||||
|
render_work.path_trace.num_samples -
|
||||||
|
render_work.path_trace.sample_offset;
|
||||||
progress_->add_samples(num_samples_added, current_sample);
|
progress_->add_samples(num_samples_added, current_sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -76,7 +76,7 @@ class PathTraceDisplay {
|
|||||||
|
|
||||||
/* Copy buffer of rendered pixels of a given size into a given position of the texture.
|
/* Copy buffer of rendered pixels of a given size into a given position of the texture.
|
||||||
*
|
*
|
||||||
* This function does not acquire a lock. The reason for this is is to allow use of this function
|
* This function does not acquire a lock. The reason for this is to allow use of this function
|
||||||
* for partial updates from different devices. In this case the caller will acquire the lock
|
* for partial updates from different devices. In this case the caller will acquire the lock
|
||||||
* once, update all the slices and release
|
* once, update all the slices and release
|
||||||
* the lock once. This will ensure that draw() will never use partially updated texture. */
|
* the lock once. This will ensure that draw() will never use partially updated texture. */
|
||||||
|
@@ -75,7 +75,10 @@ class PathTraceWork {
|
|||||||
|
|
||||||
/* Render given number of samples as a synchronous blocking call.
|
/* Render given number of samples as a synchronous blocking call.
|
||||||
* The samples are added to the render buffer associated with this work. */
|
* The samples are added to the render buffer associated with this work. */
|
||||||
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num) = 0;
|
virtual void render_samples(RenderStatistics &statistics,
|
||||||
|
int start_sample,
|
||||||
|
int samples_num,
|
||||||
|
int sample_offset) = 0;
|
||||||
|
|
||||||
/* Copy render result from this work to the corresponding place of the GPU display.
|
/* Copy render result from this work to the corresponding place of the GPU display.
|
||||||
*
|
*
|
||||||
|
@@ -71,15 +71,18 @@ void PathTraceWorkCPU::init_execution()
|
|||||||
|
|
||||||
void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int samples_num)
|
int samples_num,
|
||||||
|
int sample_offset)
|
||||||
{
|
{
|
||||||
const int64_t image_width = effective_buffer_params_.width;
|
const int64_t image_width = effective_buffer_params_.width;
|
||||||
const int64_t image_height = effective_buffer_params_.height;
|
const int64_t image_height = effective_buffer_params_.height;
|
||||||
const int64_t total_pixels_num = image_width * image_height;
|
const int64_t total_pixels_num = image_width * image_height;
|
||||||
|
|
||||||
|
if (device_->profiler.active()) {
|
||||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||||
kernel_globals.start_profiling();
|
kernel_globals.start_profiling();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||||
local_arena.execute([&]() {
|
local_arena.execute([&]() {
|
||||||
@@ -97,6 +100,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
|||||||
work_tile.w = 1;
|
work_tile.w = 1;
|
||||||
work_tile.h = 1;
|
work_tile.h = 1;
|
||||||
work_tile.start_sample = start_sample;
|
work_tile.start_sample = start_sample;
|
||||||
|
work_tile.sample_offset = sample_offset;
|
||||||
work_tile.num_samples = 1;
|
work_tile.num_samples = 1;
|
||||||
work_tile.offset = effective_buffer_params_.offset;
|
work_tile.offset = effective_buffer_params_.offset;
|
||||||
work_tile.stride = effective_buffer_params_.stride;
|
work_tile.stride = effective_buffer_params_.stride;
|
||||||
@@ -106,10 +110,11 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
|||||||
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
if (device_->profiler.active()) {
|
||||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||||
kernel_globals.stop_profiling();
|
kernel_globals.stop_profiling();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
statistics.occupancy = 1.0f;
|
statistics.occupancy = 1.0f;
|
||||||
}
|
}
|
||||||
|
@@ -48,7 +48,8 @@ class PathTraceWorkCPU : public PathTraceWork {
|
|||||||
|
|
||||||
virtual void render_samples(RenderStatistics &statistics,
|
virtual void render_samples(RenderStatistics &statistics,
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int samples_num) override;
|
int samples_num,
|
||||||
|
int sample_offset) override;
|
||||||
|
|
||||||
virtual void copy_to_display(PathTraceDisplay *display,
|
virtual void copy_to_display(PathTraceDisplay *display,
|
||||||
PassMode pass_mode,
|
PassMode pass_mode,
|
||||||
|
@@ -250,7 +250,8 @@ void PathTraceWorkGPU::init_execution()
|
|||||||
|
|
||||||
void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int samples_num)
|
int samples_num,
|
||||||
|
int sample_offset)
|
||||||
{
|
{
|
||||||
/* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to
|
/* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to
|
||||||
* add more work (because tiles are smaller, so there is higher chance that more paths will
|
* add more work (because tiles are smaller, so there is higher chance that more paths will
|
||||||
@@ -261,6 +262,7 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
|||||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||||
start_sample,
|
start_sample,
|
||||||
samples_num,
|
samples_num,
|
||||||
|
sample_offset,
|
||||||
device_scene_->data.integrator.scrambling_distance);
|
device_scene_->data.integrator.scrambling_distance);
|
||||||
|
|
||||||
enqueue_reset();
|
enqueue_reset();
|
||||||
@@ -437,7 +439,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
DCHECK_LE(work_size, max_num_paths_);
|
DCHECK_LE(work_size, max_num_paths_);
|
||||||
|
|
||||||
switch (kernel) {
|
switch (kernel) {
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
||||||
|
/* Closest ray intersection kernels with integrator state and render buffer. */
|
||||||
|
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
||||||
|
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
||||||
|
|
||||||
|
queue_->enqueue(kernel, work_size, args);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
||||||
|
@@ -46,7 +46,8 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||||||
|
|
||||||
virtual void render_samples(RenderStatistics &statistics,
|
virtual void render_samples(RenderStatistics &statistics,
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int samples_num) override;
|
int samples_num,
|
||||||
|
int sample_offset) override;
|
||||||
|
|
||||||
virtual void copy_to_display(PathTraceDisplay *display,
|
virtual void copy_to_display(PathTraceDisplay *display,
|
||||||
PassMode pass_mode,
|
PassMode pass_mode,
|
||||||
|
@@ -88,6 +88,16 @@ int RenderScheduler::get_num_samples() const
|
|||||||
return num_samples_;
|
return num_samples_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RenderScheduler::set_sample_offset(int sample_offset)
|
||||||
|
{
|
||||||
|
sample_offset_ = sample_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
int RenderScheduler::get_sample_offset() const
|
||||||
|
{
|
||||||
|
return sample_offset_;
|
||||||
|
}
|
||||||
|
|
||||||
void RenderScheduler::set_time_limit(double time_limit)
|
void RenderScheduler::set_time_limit(double time_limit)
|
||||||
{
|
{
|
||||||
time_limit_ = time_limit;
|
time_limit_ = time_limit;
|
||||||
@@ -110,13 +120,15 @@ int RenderScheduler::get_num_rendered_samples() const
|
|||||||
return state_.num_rendered_samples;
|
return state_.num_rendered_samples;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
|
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
|
||||||
{
|
{
|
||||||
buffer_params_ = buffer_params;
|
buffer_params_ = buffer_params;
|
||||||
|
|
||||||
update_start_resolution_divider();
|
update_start_resolution_divider();
|
||||||
|
|
||||||
set_num_samples(num_samples);
|
set_num_samples(num_samples);
|
||||||
|
set_start_sample(sample_offset);
|
||||||
|
set_sample_offset(sample_offset);
|
||||||
|
|
||||||
/* In background mode never do lower resolution render preview, as it is not really supported
|
/* In background mode never do lower resolution render preview, as it is not really supported
|
||||||
* by the software. */
|
* by the software. */
|
||||||
@@ -171,7 +183,7 @@ void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
|
|||||||
|
|
||||||
void RenderScheduler::reset_for_next_tile()
|
void RenderScheduler::reset_for_next_tile()
|
||||||
{
|
{
|
||||||
reset(buffer_params_, num_samples_);
|
reset(buffer_params_, num_samples_, sample_offset_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderScheduler::render_work_reschedule_on_converge(RenderWork &render_work)
|
bool RenderScheduler::render_work_reschedule_on_converge(RenderWork &render_work)
|
||||||
@@ -317,6 +329,7 @@ RenderWork RenderScheduler::get_render_work()
|
|||||||
|
|
||||||
render_work.path_trace.start_sample = get_start_sample_to_path_trace();
|
render_work.path_trace.start_sample = get_start_sample_to_path_trace();
|
||||||
render_work.path_trace.num_samples = get_num_samples_to_path_trace();
|
render_work.path_trace.num_samples = get_num_samples_to_path_trace();
|
||||||
|
render_work.path_trace.sample_offset = get_sample_offset();
|
||||||
|
|
||||||
render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
|
render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
|
||||||
|
|
||||||
@@ -827,6 +840,26 @@ int RenderScheduler::get_num_samples_to_path_trace() const
|
|||||||
num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
|
num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* When time limit is used clamp the calculated number of samples to keep occupancy.
|
||||||
|
* This is because time limit causes the last render iteration to happen with less number of
|
||||||
|
* samples, which conflicts with the occupancy (lower number of samples causes lower
|
||||||
|
* occupancy, also the calculation is based on number of previously rendered samples).
|
||||||
|
*
|
||||||
|
* When time limit is not used the number of samples per render iteration is either increasing
|
||||||
|
* or stays the same, so there is no need to clamp number of samples calculated for occupancy.
|
||||||
|
*/
|
||||||
|
if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
|
||||||
|
const double remaining_render_time = max(
|
||||||
|
0.0, time_limit_ - (time_dt() - state_.start_render_time));
|
||||||
|
const double time_per_sample_average = path_trace_time_.get_average();
|
||||||
|
const double predicted_render_time = num_samples_to_occupy * time_per_sample_average;
|
||||||
|
|
||||||
|
if (predicted_render_time > remaining_render_time) {
|
||||||
|
num_samples_to_occupy = lround(num_samples_to_occupy *
|
||||||
|
(remaining_render_time / predicted_render_time));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
num_samples_to_render = max(num_samples_to_render,
|
num_samples_to_render = max(num_samples_to_render,
|
||||||
min(num_samples_to_occupy, max_num_samples_to_render));
|
min(num_samples_to_occupy, max_num_samples_to_render));
|
||||||
}
|
}
|
||||||
|
@@ -39,6 +39,7 @@ class RenderWork {
|
|||||||
struct {
|
struct {
|
||||||
int start_sample = 0;
|
int start_sample = 0;
|
||||||
int num_samples = 0;
|
int num_samples = 0;
|
||||||
|
int sample_offset = 0;
|
||||||
} path_trace;
|
} path_trace;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
@@ -125,6 +126,9 @@ class RenderScheduler {
|
|||||||
void set_num_samples(int num_samples);
|
void set_num_samples(int num_samples);
|
||||||
int get_num_samples() const;
|
int get_num_samples() const;
|
||||||
|
|
||||||
|
void set_sample_offset(int sample_offset);
|
||||||
|
int get_sample_offset() const;
|
||||||
|
|
||||||
/* Time limit for the path tracing tasks, in minutes.
|
/* Time limit for the path tracing tasks, in minutes.
|
||||||
* Zero disables the limit. */
|
* Zero disables the limit. */
|
||||||
void set_time_limit(double time_limit);
|
void set_time_limit(double time_limit);
|
||||||
@@ -150,7 +154,7 @@ class RenderScheduler {
|
|||||||
|
|
||||||
/* Reset scheduler, indicating that rendering will happen from scratch.
|
/* Reset scheduler, indicating that rendering will happen from scratch.
|
||||||
* Resets current rendered state, as well as scheduling information. */
|
* Resets current rendered state, as well as scheduling information. */
|
||||||
void reset(const BufferParams &buffer_params, int num_samples);
|
void reset(const BufferParams &buffer_params, int num_samples, int sample_offset);
|
||||||
|
|
||||||
/* Reset scheduler upon switching to a next tile.
|
/* Reset scheduler upon switching to a next tile.
|
||||||
* Will keep the same number of samples and full-frame render parameters, but will reset progress
|
* Will keep the same number of samples and full-frame render parameters, but will reset progress
|
||||||
@@ -419,6 +423,8 @@ class RenderScheduler {
|
|||||||
int start_sample_ = 0;
|
int start_sample_ = 0;
|
||||||
int num_samples_ = 0;
|
int num_samples_ = 0;
|
||||||
|
|
||||||
|
int sample_offset_ = 0;
|
||||||
|
|
||||||
/* Limit in seconds for how long path tracing is allowed to happen.
|
/* Limit in seconds for how long path tracing is allowed to happen.
|
||||||
* Zero means no limit is applied. */
|
* Zero means no limit is applied. */
|
||||||
double time_limit_ = 0.0;
|
double time_limit_ = 0.0;
|
||||||
|
@@ -36,6 +36,7 @@ void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
|||||||
void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
||||||
int sample_start,
|
int sample_start,
|
||||||
int samples_num,
|
int samples_num,
|
||||||
|
int sample_offset,
|
||||||
float scrambling_distance)
|
float scrambling_distance)
|
||||||
{
|
{
|
||||||
/* Image buffer parameters. */
|
/* Image buffer parameters. */
|
||||||
@@ -51,6 +52,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
|||||||
/* Samples parameters. */
|
/* Samples parameters. */
|
||||||
sample_start_ = sample_start;
|
sample_start_ = sample_start;
|
||||||
samples_num_ = samples_num;
|
samples_num_ = samples_num;
|
||||||
|
sample_offset_ = sample_offset;
|
||||||
|
|
||||||
/* Initialize new scheduling. */
|
/* Initialize new scheduling. */
|
||||||
reset_scheduler_state();
|
reset_scheduler_state();
|
||||||
@@ -111,6 +113,7 @@ bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_
|
|||||||
work_tile.h = tile_size_.height;
|
work_tile.h = tile_size_.height;
|
||||||
work_tile.start_sample = sample_start_ + start_sample;
|
work_tile.start_sample = sample_start_ + start_sample;
|
||||||
work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
|
work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
|
||||||
|
work_tile.sample_offset = sample_offset_;
|
||||||
work_tile.offset = offset_;
|
work_tile.offset = offset_;
|
||||||
work_tile.stride = stride_;
|
work_tile.stride = stride_;
|
||||||
|
|
||||||
|
@@ -41,6 +41,7 @@ class WorkTileScheduler {
|
|||||||
void reset(const BufferParams &buffer_params,
|
void reset(const BufferParams &buffer_params,
|
||||||
int sample_start,
|
int sample_start,
|
||||||
int samples_num,
|
int samples_num,
|
||||||
|
int sample_offset,
|
||||||
float scrambling_distance);
|
float scrambling_distance);
|
||||||
|
|
||||||
/* Get work for a device.
|
/* Get work for a device.
|
||||||
@@ -79,6 +80,7 @@ class WorkTileScheduler {
|
|||||||
* (splitting into a smaller work tiles). */
|
* (splitting into a smaller work tiles). */
|
||||||
int sample_start_ = 0;
|
int sample_start_ = 0;
|
||||||
int samples_num_ = 0;
|
int samples_num_ = 0;
|
||||||
|
int sample_offset_ = 0;
|
||||||
|
|
||||||
/* Tile size which be scheduled for rendering. */
|
/* Tile size which be scheduled for rendering. */
|
||||||
TileSize tile_size_;
|
TileSize tile_size_;
|
||||||
|
@@ -39,6 +39,10 @@ set(SRC_KERNEL_DEVICE_HIP
|
|||||||
device/hip/kernel.cpp
|
device/hip/kernel.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(SRC_KERNEL_DEVICE_METAL
|
||||||
|
device/metal/kernel.metal
|
||||||
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_DEVICE_OPTIX
|
set(SRC_KERNEL_DEVICE_OPTIX
|
||||||
device/optix/kernel.cu
|
device/optix/kernel.cu
|
||||||
device/optix/kernel_shader_raytrace.cu
|
device/optix/kernel_shader_raytrace.cu
|
||||||
@@ -79,6 +83,13 @@ set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
|||||||
device/optix/globals.h
|
device/optix/globals.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
||||||
|
device/metal/compat.h
|
||||||
|
device/metal/context_begin.h
|
||||||
|
device/metal/context_end.h
|
||||||
|
device/metal/globals.h
|
||||||
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||||
closure/alloc.h
|
closure/alloc.h
|
||||||
closure/bsdf.h
|
closure/bsdf.h
|
||||||
@@ -262,6 +273,7 @@ set(SRC_KERNEL_UTIL_HEADERS
|
|||||||
)
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_TYPES_HEADERS
|
set(SRC_KERNEL_TYPES_HEADERS
|
||||||
|
tables.h
|
||||||
textures.h
|
textures.h
|
||||||
types.h
|
types.h
|
||||||
)
|
)
|
||||||
@@ -399,12 +411,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/cuda
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/cuda
|
||||||
--use_fast_math
|
--use_fast_math
|
||||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
|
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}
|
||||||
|
-Wno-deprecated-gpu-targets)
|
||||||
if(${experimental})
|
|
||||||
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
|
|
||||||
set(name ${name}_experimental)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(WITH_NANOVDB)
|
if(WITH_NANOVDB)
|
||||||
set(cuda_flags ${cuda_flags}
|
set(cuda_flags ${cuda_flags}
|
||||||
@@ -412,6 +420,10 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
-I "${NANOVDB_INCLUDE_DIR}")
|
-I "${NANOVDB_INCLUDE_DIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEBUG)
|
||||||
|
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||||
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
||||||
|
|
||||||
@@ -560,13 +572,14 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|||||||
-ffast-math
|
-ffast-math
|
||||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||||
|
|
||||||
if(${experimental})
|
if(WITH_NANOVDB)
|
||||||
set(hip_flags ${hip_flags} -D __KERNEL_EXPERIMENTAL__)
|
set(hip_flags ${hip_flags}
|
||||||
set(name ${name}_experimental)
|
-D WITH_NANOVDB
|
||||||
|
-I "${NANOVDB_INCLUDE_DIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_DEBUG)
|
if(WITH_CYCLES_DEBUG)
|
||||||
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
set(hip_flags ${hip_flags} -D WITH_CYCLES_DEBUG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
@@ -607,6 +620,10 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
|||||||
-I "${NANOVDB_INCLUDE_DIR}")
|
-I "${NANOVDB_INCLUDE_DIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEBUG)
|
||||||
|
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
||||||
# cycles_cubin_cc since the env variable is read before main()
|
# cycles_cubin_cc since the env variable is read before main()
|
||||||
@@ -695,7 +712,7 @@ if(WITH_COMPILER_ASAN)
|
|||||||
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=all")
|
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=all")
|
||||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
||||||
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||||
# With OSL, Cycles disables rtti in some modules, wich then breaks at linking
|
# With OSL, Cycles disables rtti in some modules, which then breaks at linking
|
||||||
# when trying to use vptr sanitizer (included into 'undefined' general option).
|
# when trying to use vptr sanitizer (included into 'undefined' general option).
|
||||||
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=vptr")
|
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=vptr")
|
||||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
||||||
@@ -723,12 +740,14 @@ cycles_add_library(cycles_kernel "${LIB}"
|
|||||||
${SRC_KERNEL_DEVICE_CUDA}
|
${SRC_KERNEL_DEVICE_CUDA}
|
||||||
${SRC_KERNEL_DEVICE_HIP}
|
${SRC_KERNEL_DEVICE_HIP}
|
||||||
${SRC_KERNEL_DEVICE_OPTIX}
|
${SRC_KERNEL_DEVICE_OPTIX}
|
||||||
|
${SRC_KERNEL_DEVICE_METAL}
|
||||||
${SRC_KERNEL_HEADERS}
|
${SRC_KERNEL_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||||
|
${SRC_KERNEL_DEVICE_METAL_HEADERS}
|
||||||
)
|
)
|
||||||
|
|
||||||
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
||||||
@@ -740,6 +759,7 @@ source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_
|
|||||||
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
||||||
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
||||||
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
||||||
|
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
|
||||||
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
||||||
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
||||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||||
@@ -772,6 +792,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
|
|||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||||
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||||
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
||||||
|
@@ -97,7 +97,7 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
|
|||||||
swapped = false;
|
swapped = false;
|
||||||
for (int j = 0; j < num_hits - 1; ++j) {
|
for (int j = 0; j < num_hits - 1; ++j) {
|
||||||
if (hits[j].t > hits[j + 1].t) {
|
if (hits[j].t > hits[j + 1].t) {
|
||||||
struct Intersection tmp_hit = hits[j];
|
Intersection tmp_hit = hits[j];
|
||||||
float3 tmp_Ng = Ng[j];
|
float3 tmp_Ng = Ng[j];
|
||||||
hits[j] = hits[j + 1];
|
hits[j] = hits[j + 1];
|
||||||
Ng[j] = Ng[j + 1];
|
Ng[j] = Ng[j + 1];
|
||||||
|
@@ -438,7 +438,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
|
|||||||
if (label & LABEL_TRANSMIT) {
|
if (label & LABEL_TRANSMIT) {
|
||||||
float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
|
float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
|
||||||
|
|
||||||
if (threshold_squared >= 0.0f) {
|
if (threshold_squared >= 0.0f && !(label & LABEL_DIFFUSE)) {
|
||||||
if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
|
if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
|
||||||
label |= LABEL_TRANSMIT_TRANSPARENT;
|
label |= LABEL_TRANSMIT_TRANSPARENT;
|
||||||
}
|
}
|
||||||
|
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "kernel/tables.h"
|
||||||
#include "kernel/types.h"
|
#include "kernel/types.h"
|
||||||
#include "kernel/util/profiling.h"
|
#include "kernel/util/profiling.h"
|
||||||
|
|
||||||
|
@@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
||||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
|
KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
|
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
|
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
|
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
|
||||||
|
@@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
|
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
|
||||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
|
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
|
||||||
DEFINE_INTEGRATOR_KERNEL(intersect_closest)
|
DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
|
||||||
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
|
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
|
||||||
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
|
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
|
||||||
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
|
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
|
||||||
|
@@ -52,8 +52,9 @@ typedef unsigned long long uint64_t;
|
|||||||
#endif
|
#endif
|
||||||
#define ccl_device_noinline __device__ __noinline__
|
#define ccl_device_noinline __device__ __noinline__
|
||||||
#define ccl_device_noinline_cpu ccl_device
|
#define ccl_device_noinline_cpu ccl_device
|
||||||
|
#define ccl_device_inline_method ccl_device
|
||||||
#define ccl_global
|
#define ccl_global
|
||||||
#define ccl_static_constant __constant__
|
#define ccl_inline_constant __constant__
|
||||||
#define ccl_device_constant __constant__ __device__
|
#define ccl_device_constant __constant__ __device__
|
||||||
#define ccl_constant const
|
#define ccl_constant const
|
||||||
#define ccl_gpu_shared __shared__
|
#define ccl_gpu_shared __shared__
|
||||||
@@ -75,6 +76,7 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
#define ccl_gpu_warp_size (warpSize)
|
||||||
|
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||||
@@ -84,7 +86,6 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_syncthreads() __syncthreads()
|
#define ccl_gpu_syncthreads() __syncthreads()
|
||||||
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
|
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
|
||||||
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down_sync(mask, var, detla)
|
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down_sync(mask, var, detla)
|
||||||
#define ccl_gpu_popc(x) __popc(x)
|
|
||||||
|
|
||||||
/* GPU texture objects */
|
/* GPU texture objects */
|
||||||
|
|
||||||
|
@@ -92,12 +92,29 @@
|
|||||||
|
|
||||||
/* Compute number of threads per block and minimum blocks per multiprocessor
|
/* Compute number of threads per block and minimum blocks per multiprocessor
|
||||||
* given the maximum number of registers per thread. */
|
* given the maximum number of registers per thread. */
|
||||||
|
|
||||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
||||||
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
||||||
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
||||||
(block_num_threads * thread_num_registers))
|
(block_num_threads * thread_num_registers))
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_threads(block_num_threads) \
|
||||||
|
extern "C" __global__ void __launch_bounds__(block_num_threads)
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_call(x) x
|
||||||
|
|
||||||
|
/* Define a function object where "func" is the lambda body, and additional parameters are used to
|
||||||
|
* specify captured state */
|
||||||
|
#define ccl_gpu_kernel_lambda(func, ...) \
|
||||||
|
struct KernelLambda { \
|
||||||
|
__VA_ARGS__; \
|
||||||
|
__device__ int operator()(const int state) \
|
||||||
|
{ \
|
||||||
|
return (func); \
|
||||||
|
} \
|
||||||
|
} ccl_gpu_kernel_lambda_pass
|
||||||
|
|
||||||
/* sanity checks */
|
/* sanity checks */
|
||||||
|
|
||||||
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
||||||
|
@@ -65,7 +65,9 @@ ccl_device float cubic_h1(float a)
|
|||||||
|
|
||||||
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
|
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y)
|
ccl_device_noinline T kernel_tex_image_interp_bicubic(ccl_global const TextureInfo &info,
|
||||||
|
float x,
|
||||||
|
float y)
|
||||||
{
|
{
|
||||||
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
||||||
|
|
||||||
@@ -94,7 +96,7 @@ ccl_device_noinline T kernel_tex_image_interp_bicubic(const TextureInfo &info, f
|
|||||||
/* Fast tricubic texture lookup using 8 trilinear lookups. */
|
/* Fast tricubic texture lookup using 8 trilinear lookups. */
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline T
|
ccl_device_noinline T
|
||||||
kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
|
kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, float y, float z)
|
||||||
{
|
{
|
||||||
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
||||||
|
|
||||||
@@ -169,7 +171,7 @@ ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, fl
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
||||||
const TextureInfo &info, float x, float y, float z, uint interpolation)
|
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
|
||||||
{
|
{
|
||||||
using namespace nanovdb;
|
using namespace nanovdb;
|
||||||
|
|
||||||
@@ -191,7 +193,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
|||||||
|
|
||||||
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
|
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
|
||||||
{
|
{
|
||||||
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||||
|
|
||||||
/* float4, byte4, ushort4 and half4 */
|
/* float4, byte4, ushort4 and half4 */
|
||||||
const int texture_type = info.data_type;
|
const int texture_type = info.data_type;
|
||||||
@@ -226,7 +228,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
|||||||
float3 P,
|
float3 P,
|
||||||
InterpolationType interp)
|
InterpolationType interp)
|
||||||
{
|
{
|
||||||
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||||
|
|
||||||
if (info.use_transform_3d) {
|
if (info.use_transform_3d) {
|
||||||
P = transform_point(&info.transform_3d, P);
|
P = transform_point(&info.transform_3d, P);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -31,10 +31,43 @@ CCL_NAMESPACE_BEGIN
|
|||||||
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
|
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __KERNEL_METAL__
|
||||||
|
struct ActiveIndexContext {
|
||||||
|
ActiveIndexContext(int _thread_index,
|
||||||
|
int _global_index,
|
||||||
|
int _threadgroup_size,
|
||||||
|
int _simdgroup_size,
|
||||||
|
int _simd_lane_index,
|
||||||
|
int _simd_group_index,
|
||||||
|
int _num_simd_groups,
|
||||||
|
threadgroup int *_simdgroup_offset)
|
||||||
|
: thread_index(_thread_index),
|
||||||
|
global_index(_global_index),
|
||||||
|
blocksize(_threadgroup_size),
|
||||||
|
ccl_gpu_warp_size(_simdgroup_size),
|
||||||
|
thread_warp(_simd_lane_index),
|
||||||
|
warp_index(_simd_group_index),
|
||||||
|
num_warps(_num_simd_groups),
|
||||||
|
warp_offset(_simdgroup_offset)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
const int thread_index, global_index, blocksize, ccl_gpu_warp_size, thread_warp, warp_index,
|
||||||
|
num_warps;
|
||||||
|
threadgroup int *warp_offset;
|
||||||
|
|
||||||
|
template<uint blocksizeDummy, typename IsActiveOp>
|
||||||
|
void active_index_array(const uint num_states,
|
||||||
|
ccl_global int *indices,
|
||||||
|
ccl_global int *num_indices,
|
||||||
|
IsActiveOp is_active_op)
|
||||||
|
{
|
||||||
|
const uint state_index = global_index;
|
||||||
|
#else
|
||||||
template<uint blocksize, typename IsActiveOp>
|
template<uint blocksize, typename IsActiveOp>
|
||||||
__device__ void gpu_parallel_active_index_array(const uint num_states,
|
__device__ void gpu_parallel_active_index_array(const uint num_states,
|
||||||
int *indices,
|
ccl_global int *indices,
|
||||||
int *num_indices,
|
ccl_global int *num_indices,
|
||||||
IsActiveOp is_active_op)
|
IsActiveOp is_active_op)
|
||||||
{
|
{
|
||||||
extern ccl_gpu_shared int warp_offset[];
|
extern ccl_gpu_shared int warp_offset[];
|
||||||
@@ -45,13 +78,15 @@ __device__ void gpu_parallel_active_index_array(const uint num_states,
|
|||||||
const uint warp_index = thread_index / ccl_gpu_warp_size;
|
const uint warp_index = thread_index / ccl_gpu_warp_size;
|
||||||
const uint num_warps = blocksize / ccl_gpu_warp_size;
|
const uint num_warps = blocksize / ccl_gpu_warp_size;
|
||||||
|
|
||||||
/* Test if state corresponding to this thread is active. */
|
|
||||||
const uint state_index = ccl_gpu_block_idx_x * blocksize + thread_index;
|
const uint state_index = ccl_gpu_block_idx_x * blocksize + thread_index;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Test if state corresponding to this thread is active. */
|
||||||
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
||||||
|
|
||||||
/* For each thread within a warp compute how many other active states precede it. */
|
/* For each thread within a warp compute how many other active states precede it. */
|
||||||
const uint thread_mask = 0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp);
|
const uint thread_offset = popcount(ccl_gpu_ballot(is_active) &
|
||||||
const uint thread_offset = ccl_gpu_popc(ccl_gpu_ballot(is_active) & thread_mask);
|
ccl_gpu_thread_mask(thread_warp));
|
||||||
|
|
||||||
/* Last thread in warp stores number of active states for each warp. */
|
/* Last thread in warp stores number of active states for each warp. */
|
||||||
if (thread_warp == ccl_gpu_warp_size - 1) {
|
if (thread_warp == ccl_gpu_warp_size - 1) {
|
||||||
@@ -84,4 +119,21 @@ __device__ void gpu_parallel_active_index_array(const uint num_states,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __KERNEL_METAL__
|
||||||
|
}; /* end class ActiveIndexContext */
|
||||||
|
|
||||||
|
/* inject the required thread params into a struct, and redirect to its templated member function
|
||||||
|
*/
|
||||||
|
# define gpu_parallel_active_index_array \
|
||||||
|
ActiveIndexContext(metal_local_id, \
|
||||||
|
metal_global_id, \
|
||||||
|
metal_local_size, \
|
||||||
|
simdgroup_size, \
|
||||||
|
simd_lane_index, \
|
||||||
|
simd_group_index, \
|
||||||
|
num_simd_groups, \
|
||||||
|
simdgroup_offset) \
|
||||||
|
.active_index_array
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -33,10 +33,12 @@ CCL_NAMESPACE_BEGIN
|
|||||||
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
|
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<uint blocksize>
|
__device__ void gpu_parallel_prefix_sum(const int global_id,
|
||||||
__device__ void gpu_parallel_prefix_sum(int *counter, int *prefix_sum, const int num_values)
|
ccl_global int *counter,
|
||||||
|
ccl_global int *prefix_sum,
|
||||||
|
const int num_values)
|
||||||
{
|
{
|
||||||
if (!(ccl_gpu_block_idx_x == 0 && ccl_gpu_thread_idx_x == 0)) {
|
if (global_id != 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -33,16 +33,16 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#endif
|
#endif
|
||||||
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
||||||
|
|
||||||
template<uint blocksize, typename GetKeyOp>
|
template<typename GetKeyOp>
|
||||||
__device__ void gpu_parallel_sorted_index_array(const uint num_states,
|
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
|
||||||
|
const uint num_states,
|
||||||
const int num_states_limit,
|
const int num_states_limit,
|
||||||
int *indices,
|
ccl_global int *indices,
|
||||||
int *num_indices,
|
ccl_global int *num_indices,
|
||||||
int *key_counter,
|
ccl_global int *key_counter,
|
||||||
int *key_prefix_sum,
|
ccl_global int *key_prefix_sum,
|
||||||
GetKeyOp get_key_op)
|
GetKeyOp get_key_op)
|
||||||
{
|
{
|
||||||
const uint state_index = ccl_gpu_block_idx_x * blocksize + ccl_gpu_thread_idx_x;
|
|
||||||
const int key = (state_index < num_states) ? get_key_op(state_index) :
|
const int key = (state_index < num_states) ? get_key_op(state_index) :
|
||||||
GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
|
GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
|
||||||
|
|
||||||
|
@@ -45,8 +45,9 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_device_forceinline __device__ __forceinline__
|
#define ccl_device_forceinline __device__ __forceinline__
|
||||||
#define ccl_device_noinline __device__ __noinline__
|
#define ccl_device_noinline __device__ __noinline__
|
||||||
#define ccl_device_noinline_cpu ccl_device
|
#define ccl_device_noinline_cpu ccl_device
|
||||||
|
#define ccl_device_inline_method ccl_device
|
||||||
#define ccl_global
|
#define ccl_global
|
||||||
#define ccl_static_constant __constant__
|
#define ccl_inline_constant __constant__
|
||||||
#define ccl_device_constant __constant__ __device__
|
#define ccl_device_constant __constant__ __device__
|
||||||
#define ccl_constant const
|
#define ccl_constant const
|
||||||
#define ccl_gpu_shared __shared__
|
#define ccl_gpu_shared __shared__
|
||||||
@@ -74,6 +75,7 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
#define ccl_gpu_warp_size (warpSize)
|
||||||
|
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||||
@@ -83,7 +85,6 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_syncthreads() __syncthreads()
|
#define ccl_gpu_syncthreads() __syncthreads()
|
||||||
#define ccl_gpu_ballot(predicate) __ballot(predicate)
|
#define ccl_gpu_ballot(predicate) __ballot(predicate)
|
||||||
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down(var, detla)
|
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down(var, detla)
|
||||||
#define ccl_gpu_popc(x) __popc(x)
|
|
||||||
|
|
||||||
/* GPU texture objects */
|
/* GPU texture objects */
|
||||||
typedef hipTextureObject_t ccl_gpu_tex_object;
|
typedef hipTextureObject_t ccl_gpu_tex_object;
|
||||||
|
@@ -35,12 +35,29 @@
|
|||||||
|
|
||||||
/* Compute number of threads per block and minimum blocks per multiprocessor
|
/* Compute number of threads per block and minimum blocks per multiprocessor
|
||||||
* given the maximum number of registers per thread. */
|
* given the maximum number of registers per thread. */
|
||||||
|
|
||||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
||||||
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
||||||
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
||||||
(block_num_threads * thread_num_registers))
|
(block_num_threads * thread_num_registers))
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_threads(block_num_threads) \
|
||||||
|
extern "C" __global__ void __launch_bounds__(block_num_threads)
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_call(x) x
|
||||||
|
|
||||||
|
/* Define a function object where "func" is the lambda body, and additional parameters are used to
|
||||||
|
* specify captured state */
|
||||||
|
#define ccl_gpu_kernel_lambda(func, ...) \
|
||||||
|
struct KernelLambda { \
|
||||||
|
__VA_ARGS__; \
|
||||||
|
__device__ int operator()(const int state) \
|
||||||
|
{ \
|
||||||
|
return (func); \
|
||||||
|
} \
|
||||||
|
} ccl_gpu_kernel_lambda_pass
|
||||||
|
|
||||||
/* sanity checks */
|
/* sanity checks */
|
||||||
|
|
||||||
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
||||||
|
@@ -34,6 +34,7 @@ using namespace metal;
|
|||||||
|
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
#pragma clang diagnostic ignored "-Wsign-compare"
|
#pragma clang diagnostic ignored "-Wsign-compare"
|
||||||
|
#pragma clang diagnostic ignored "-Wuninitialized"
|
||||||
|
|
||||||
/* Qualifiers */
|
/* Qualifiers */
|
||||||
|
|
||||||
@@ -42,8 +43,9 @@ using namespace metal;
|
|||||||
#define ccl_device_forceinline ccl_device
|
#define ccl_device_forceinline ccl_device
|
||||||
#define ccl_device_noinline ccl_device __attribute__((noinline))
|
#define ccl_device_noinline ccl_device __attribute__((noinline))
|
||||||
#define ccl_device_noinline_cpu ccl_device
|
#define ccl_device_noinline_cpu ccl_device
|
||||||
|
#define ccl_device_inline_method ccl_device
|
||||||
#define ccl_global device
|
#define ccl_global device
|
||||||
#define ccl_static_constant static constant constexpr
|
#define ccl_inline_constant static constant constexpr
|
||||||
#define ccl_device_constant constant
|
#define ccl_device_constant constant
|
||||||
#define ccl_constant const device
|
#define ccl_constant const device
|
||||||
#define ccl_gpu_shared threadgroup
|
#define ccl_gpu_shared threadgroup
|
||||||
@@ -58,6 +60,122 @@ using namespace metal;
|
|||||||
|
|
||||||
#define kernel_assert(cond)
|
#define kernel_assert(cond)
|
||||||
|
|
||||||
|
#define ccl_gpu_global_id_x() metal_global_id
|
||||||
|
#define ccl_gpu_warp_size simdgroup_size
|
||||||
|
#define ccl_gpu_thread_idx_x simd_group_index
|
||||||
|
#define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1)
|
||||||
|
|
||||||
|
#define ccl_gpu_ballot(predicate) ((uint64_t)((simd_vote::vote_t)simd_ballot(predicate)))
|
||||||
|
#define ccl_gpu_syncthreads() threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
|
/* kernel.h adapters */
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel(block_num_threads, thread_num_registers)
|
||||||
|
#define ccl_gpu_kernel_threads(block_num_threads)
|
||||||
|
|
||||||
|
/* Convert a comma-separated list into a semicolon-separated list
|
||||||
|
* (so that we can generate a struct based on kernel entry-point parameters). */
|
||||||
|
#define FN0()
|
||||||
|
#define FN1(p1) p1;
|
||||||
|
#define FN2(p1, p2) p1; p2;
|
||||||
|
#define FN3(p1, p2, p3) p1; p2; p3;
|
||||||
|
#define FN4(p1, p2, p3, p4) p1; p2; p3; p4;
|
||||||
|
#define FN5(p1, p2, p3, p4, p5) p1; p2; p3; p4; p5;
|
||||||
|
#define FN6(p1, p2, p3, p4, p5, p6) p1; p2; p3; p4; p5; p6;
|
||||||
|
#define FN7(p1, p2, p3, p4, p5, p6, p7) p1; p2; p3; p4; p5; p6; p7;
|
||||||
|
#define FN8(p1, p2, p3, p4, p5, p6, p7, p8) p1; p2; p3; p4; p5; p6; p7; p8;
|
||||||
|
#define FN9(p1, p2, p3, p4, p5, p6, p7, p8, p9) p1; p2; p3; p4; p5; p6; p7; p8; p9;
|
||||||
|
#define FN10(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10;
|
||||||
|
#define FN11(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11;
|
||||||
|
#define FN12(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12;
|
||||||
|
#define FN13(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13;
|
||||||
|
#define FN14(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14;
|
||||||
|
#define FN15(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15;
|
||||||
|
#define FN16(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16;
|
||||||
|
#define GET_LAST_ARG(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ...) p16
|
||||||
|
#define PARAMS_MAKER(...) GET_LAST_ARG(__VA_ARGS__, FN16, FN15, FN14, FN13, FN12, FN11, FN10, FN9, FN8, FN7, FN6, FN5, FN4, FN3, FN2, FN1, FN0)
|
||||||
|
|
||||||
|
/* Generate a struct containing the entry-point parameters and a "run"
|
||||||
|
* method which can access them implicitly via this-> */
|
||||||
|
#define ccl_gpu_kernel_signature(name, ...) \
|
||||||
|
struct kernel_gpu_##name \
|
||||||
|
{ \
|
||||||
|
PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
|
||||||
|
void run(thread MetalKernelContext& context, \
|
||||||
|
threadgroup int *simdgroup_offset, \
|
||||||
|
const uint metal_global_id, \
|
||||||
|
const ushort metal_local_id, \
|
||||||
|
const ushort metal_local_size, \
|
||||||
|
uint simdgroup_size, \
|
||||||
|
uint simd_lane_index, \
|
||||||
|
uint simd_group_index, \
|
||||||
|
uint num_simd_groups) ccl_global const; \
|
||||||
|
}; \
|
||||||
|
kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \
|
||||||
|
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
||||||
|
constant MetalAncillaries *_metal_ancillaries, \
|
||||||
|
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
|
||||||
|
const uint metal_global_id [[thread_position_in_grid]], \
|
||||||
|
const ushort metal_local_id [[thread_position_in_threadgroup]], \
|
||||||
|
const ushort metal_local_size [[threads_per_threadgroup]], \
|
||||||
|
uint simdgroup_size [[threads_per_simdgroup]], \
|
||||||
|
uint simd_lane_index [[thread_index_in_simdgroup]], \
|
||||||
|
uint simd_group_index [[simdgroup_index_in_threadgroup]], \
|
||||||
|
uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
|
||||||
|
MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
|
||||||
|
params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
||||||
|
} \
|
||||||
|
void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||||
|
threadgroup int *simdgroup_offset, \
|
||||||
|
const uint metal_global_id, \
|
||||||
|
const ushort metal_local_id, \
|
||||||
|
const ushort metal_local_size, \
|
||||||
|
uint simdgroup_size, \
|
||||||
|
uint simd_lane_index, \
|
||||||
|
uint simd_group_index, \
|
||||||
|
uint num_simd_groups) ccl_global const
|
||||||
|
|
||||||
|
#define ccl_gpu_kernel_call(x) context.x
|
||||||
|
|
||||||
|
/* define a function object where "func" is the lambda body, and additional parameters are used to specify captured state */
|
||||||
|
#define ccl_gpu_kernel_lambda(func, ...) \
|
||||||
|
struct KernelLambda \
|
||||||
|
{ \
|
||||||
|
KernelLambda(ccl_private MetalKernelContext &_context) : context(_context) {} \
|
||||||
|
ccl_private MetalKernelContext &context; \
|
||||||
|
__VA_ARGS__; \
|
||||||
|
int operator()(const int state) const { return (func); } \
|
||||||
|
} ccl_gpu_kernel_lambda_pass(context)
|
||||||
|
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
/* volumetric lambda functions - use function objects for lambda-like functionality */
|
||||||
|
#define VOLUME_READ_LAMBDA(function_call) \
|
||||||
|
struct FnObjectRead { \
|
||||||
|
KernelGlobals kg; \
|
||||||
|
ccl_private MetalKernelContext *context; \
|
||||||
|
int state; \
|
||||||
|
\
|
||||||
|
VolumeStack operator()(const int i) const \
|
||||||
|
{ \
|
||||||
|
return context->function_call; \
|
||||||
|
} \
|
||||||
|
} volume_read_lambda_pass{kg, this, state};
|
||||||
|
|
||||||
|
#define VOLUME_WRITE_LAMBDA(function_call) \
|
||||||
|
struct FnObjectWrite { \
|
||||||
|
KernelGlobals kg; \
|
||||||
|
ccl_private MetalKernelContext *context; \
|
||||||
|
int state; \
|
||||||
|
\
|
||||||
|
void operator()(const int i, VolumeStack entry) const \
|
||||||
|
{ \
|
||||||
|
context->function_call; \
|
||||||
|
} \
|
||||||
|
} volume_write_lambda_pass{kg, this, state};
|
||||||
|
|
||||||
/* make_type definitions with Metal style element initializers */
|
/* make_type definitions with Metal style element initializers */
|
||||||
#ifdef make_float2
|
#ifdef make_float2
|
||||||
# undef make_float2
|
# undef make_float2
|
||||||
@@ -112,6 +230,7 @@ using namespace metal;
|
|||||||
#define sinhf(x) sinh(float(x))
|
#define sinhf(x) sinh(float(x))
|
||||||
#define coshf(x) cosh(float(x))
|
#define coshf(x) cosh(float(x))
|
||||||
#define tanhf(x) tanh(float(x))
|
#define tanhf(x) tanh(float(x))
|
||||||
|
#define saturatef(x) saturate(float(x))
|
||||||
|
|
||||||
/* Use native functions with possibly lower precision for performance,
|
/* Use native functions with possibly lower precision for performance,
|
||||||
* no issues found so far. */
|
* no issues found so far. */
|
||||||
@@ -124,3 +243,43 @@ using namespace metal;
|
|||||||
#define logf(x) trigmode::log(float(x))
|
#define logf(x) trigmode::log(float(x))
|
||||||
|
|
||||||
#define NULL 0
|
#define NULL 0
|
||||||
|
|
||||||
|
#define __device__
|
||||||
|
|
||||||
|
/* texture bindings and sampler setup */
|
||||||
|
|
||||||
|
struct Texture2DParamsMetal {
|
||||||
|
texture2d<float, access::sample> tex;
|
||||||
|
};
|
||||||
|
struct Texture3DParamsMetal {
|
||||||
|
texture3d<float, access::sample> tex;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MetalAncillaries {
|
||||||
|
device Texture2DParamsMetal *textures_2d;
|
||||||
|
device Texture3DParamsMetal *textures_3d;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "util/half.h"
|
||||||
|
#include "util/types.h"
|
||||||
|
|
||||||
|
enum SamplerType {
|
||||||
|
SamplerFilterNearest_AddressRepeat,
|
||||||
|
SamplerFilterNearest_AddressClampEdge,
|
||||||
|
SamplerFilterNearest_AddressClampZero,
|
||||||
|
|
||||||
|
SamplerFilterLinear_AddressRepeat,
|
||||||
|
SamplerFilterLinear_AddressClampEdge,
|
||||||
|
SamplerFilterLinear_AddressClampZero,
|
||||||
|
|
||||||
|
SamplerCount
|
||||||
|
};
|
||||||
|
|
||||||
|
constant constexpr array<sampler, SamplerCount> metal_samplers = {
|
||||||
|
sampler(address::repeat, filter::nearest),
|
||||||
|
sampler(address::clamp_to_edge, filter::nearest),
|
||||||
|
sampler(address::clamp_to_zero, filter::nearest),
|
||||||
|
sampler(address::repeat, filter::linear),
|
||||||
|
sampler(address::clamp_to_edge, filter::linear),
|
||||||
|
sampler(address::clamp_to_zero, filter::linear),
|
||||||
|
};
|
||||||
|
79
intern/cycles/kernel/device/metal/context_begin.h
Normal file
79
intern/cycles/kernel/device/metal/context_begin.h
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
|
/* Open the Metal kernel context class
|
||||||
|
* Necessary to access resource bindings */
|
||||||
|
class MetalKernelContext {
|
||||||
|
public:
|
||||||
|
constant KernelParamsMetal &launch_params_metal;
|
||||||
|
constant MetalAncillaries *metal_ancillaries;
|
||||||
|
|
||||||
|
MetalKernelContext(constant KernelParamsMetal &_launch_params_metal, constant MetalAncillaries * _metal_ancillaries)
|
||||||
|
: launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/* texture fetch adapter functions */
|
||||||
|
typedef uint64_t ccl_gpu_tex_object;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline __attribute__((__always_inline__))
|
||||||
|
T ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
||||||
|
kernel_assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
template<typename T>
|
||||||
|
inline __attribute__((__always_inline__))
|
||||||
|
T ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
||||||
|
kernel_assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// texture2d
|
||||||
|
template<>
|
||||||
|
inline __attribute__((__always_inline__))
|
||||||
|
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
||||||
|
const uint tid(tex);
|
||||||
|
const uint sid(tex >> 32);
|
||||||
|
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
inline __attribute__((__always_inline__))
|
||||||
|
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
||||||
|
const uint tid(tex);
|
||||||
|
const uint sid(tex >> 32);
|
||||||
|
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// texture3d
|
||||||
|
template<>
|
||||||
|
inline __attribute__((__always_inline__))
|
||||||
|
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
||||||
|
const uint tid(tex);
|
||||||
|
const uint sid(tex >> 32);
|
||||||
|
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
inline __attribute__((__always_inline__))
|
||||||
|
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
||||||
|
const uint tid(tex);
|
||||||
|
const uint sid(tex >> 32);
|
||||||
|
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
|
||||||
|
}
|
||||||
|
# include "kernel/device/gpu/image.h"
|
||||||
|
|
||||||
|
// clang-format on
|
23
intern/cycles/kernel/device/metal/context_end.h
Normal file
23
intern/cycles/kernel/device/metal/context_end.h
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
; /* end of MetalKernelContext class definition */
|
||||||
|
|
||||||
|
/* Silently redirect into the MetalKernelContext instance */
|
||||||
|
/* NOTE: These macros will need maintaining as entry-points change. */
|
||||||
|
|
||||||
|
#undef kernel_integrator_state
|
||||||
|
#define kernel_integrator_state context.launch_params_metal.__integrator_state
|
51
intern/cycles/kernel/device/metal/globals.h
Normal file
51
intern/cycles/kernel/device/metal/globals.h
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Constant Globals */
|
||||||
|
|
||||||
|
#include "kernel/types.h"
|
||||||
|
#include "kernel/util/profiling.h"
|
||||||
|
|
||||||
|
#include "kernel/integrator/state.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
typedef struct KernelParamsMetal {
|
||||||
|
|
||||||
|
#define KERNEL_TEX(type, name) ccl_global const type *name;
|
||||||
|
#include "kernel/textures.h"
|
||||||
|
#undef KERNEL_TEX
|
||||||
|
|
||||||
|
const IntegratorStateGPU __integrator_state;
|
||||||
|
const KernelData data;
|
||||||
|
|
||||||
|
} KernelParamsMetal;
|
||||||
|
|
||||||
|
typedef struct KernelGlobalsGPU {
|
||||||
|
int unused[1];
|
||||||
|
} KernelGlobalsGPU;
|
||||||
|
|
||||||
|
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
|
||||||
|
|
||||||
|
#define kernel_data launch_params_metal.data
|
||||||
|
#define kernel_integrator_state launch_params_metal.__integrator_state
|
||||||
|
|
||||||
|
/* data lookup defines */
|
||||||
|
|
||||||
|
#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index]
|
||||||
|
#define kernel_tex_array(tex) launch_params_metal.tex
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
25
intern/cycles/kernel/device/metal/kernel.metal
Normal file
25
intern/cycles/kernel/device/metal/kernel.metal
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Metal kernel entry points */
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
|
#include "kernel/device/metal/compat.h"
|
||||||
|
#include "kernel/device/metal/globals.h"
|
||||||
|
#include "kernel/device/gpu/kernel.h"
|
||||||
|
|
||||||
|
// clang-format on
|
@@ -49,10 +49,11 @@ typedef unsigned long long uint64_t;
|
|||||||
__device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
__device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
||||||
#define ccl_device_inline ccl_device
|
#define ccl_device_inline ccl_device
|
||||||
#define ccl_device_forceinline ccl_device
|
#define ccl_device_forceinline ccl_device
|
||||||
|
#define ccl_device_inline_method ccl_device
|
||||||
#define ccl_device_noinline __device__ __noinline__
|
#define ccl_device_noinline __device__ __noinline__
|
||||||
#define ccl_device_noinline_cpu ccl_device
|
#define ccl_device_noinline_cpu ccl_device
|
||||||
#define ccl_global
|
#define ccl_global
|
||||||
#define ccl_static_constant __constant__
|
#define ccl_inline_constant __constant__
|
||||||
#define ccl_device_constant __constant__ __device__
|
#define ccl_device_constant __constant__ __device__
|
||||||
#define ccl_constant const
|
#define ccl_constant const
|
||||||
#define ccl_gpu_shared __shared__
|
#define ccl_gpu_shared __shared__
|
||||||
@@ -76,6 +77,7 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
#define ccl_gpu_warp_size (warpSize)
|
||||||
|
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||||
@@ -85,7 +87,6 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_syncthreads() __syncthreads()
|
#define ccl_gpu_syncthreads() __syncthreads()
|
||||||
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
|
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
|
||||||
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down_sync(mask, var, detla)
|
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down_sync(mask, var, detla)
|
||||||
#define ccl_gpu_popc(x) __popc(x)
|
|
||||||
|
|
||||||
/* GPU texture objects */
|
/* GPU texture objects */
|
||||||
|
|
||||||
|
@@ -21,6 +21,8 @@
|
|||||||
|
|
||||||
#include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */
|
#include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */
|
||||||
|
|
||||||
|
#include "kernel/tables.h"
|
||||||
|
|
||||||
#include "kernel/integrator/state.h"
|
#include "kernel/integrator/state.h"
|
||||||
#include "kernel/integrator/state_flow.h"
|
#include "kernel/integrator/state_flow.h"
|
||||||
#include "kernel/integrator/state_util.h"
|
#include "kernel/integrator/state_util.h"
|
||||||
@@ -44,7 +46,7 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
|
|||||||
ccl_device_forceinline int get_object_id()
|
ccl_device_forceinline int get_object_id()
|
||||||
{
|
{
|
||||||
#ifdef __OBJECT_MOTION__
|
#ifdef __OBJECT_MOTION__
|
||||||
/* Always get the the instance ID from the TLAS
|
/* Always get the instance ID from the TLAS
|
||||||
* There might be a motion transform node between TLAS and BLAS which does not have one. */
|
* There might be a motion transform node between TLAS and BLAS which does not have one. */
|
||||||
return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
||||||
#else
|
#else
|
||||||
@@ -57,7 +59,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
|||||||
const int global_index = optixGetLaunchIndex().x;
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
||||||
global_index;
|
global_index;
|
||||||
integrator_intersect_closest(nullptr, path_index);
|
integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
||||||
@@ -159,9 +161,9 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
|
|||||||
|
|
||||||
/* Record geometric normal. */
|
/* Record geometric normal. */
|
||||||
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
|
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
|
||||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
|
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0);
|
||||||
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
|
const float3 tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1);
|
||||||
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
|
const float3 tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
|
||||||
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||||
|
|
||||||
/* Continue tracing (without this the trace call would return after the first hit). */
|
/* Continue tracing (without this the trace call would return after the first hit). */
|
||||||
|
@@ -33,62 +33,72 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* them separately. */
|
* them separately. */
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
|
ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
|
||||||
const bool is_diffuse,
|
const ClosureType closure_type,
|
||||||
float3 value)
|
float3 value)
|
||||||
{
|
{
|
||||||
eval->diffuse = zero_float3();
|
eval->diffuse = zero_float3();
|
||||||
eval->glossy = zero_float3();
|
eval->glossy = zero_float3();
|
||||||
|
|
||||||
if (is_diffuse) {
|
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
|
||||||
eval->diffuse = value;
|
eval->diffuse = value;
|
||||||
}
|
}
|
||||||
else {
|
else if (CLOSURE_IS_BSDF_GLOSSY(closure_type)) {
|
||||||
eval->glossy = value;
|
eval->glossy = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eval->sum = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
|
ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
|
||||||
const bool is_diffuse,
|
const ClosureType closure_type,
|
||||||
float3 value,
|
float3 value)
|
||||||
float mis_weight)
|
|
||||||
{
|
{
|
||||||
value *= mis_weight;
|
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
|
||||||
|
|
||||||
if (is_diffuse) {
|
|
||||||
eval->diffuse += value;
|
eval->diffuse += value;
|
||||||
}
|
}
|
||||||
else {
|
else if (CLOSURE_IS_BSDF_GLOSSY(closure_type)) {
|
||||||
eval->glossy += value;
|
eval->glossy += value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eval->sum += value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
|
ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
|
||||||
{
|
{
|
||||||
return is_zero(eval->diffuse) && is_zero(eval->glossy);
|
return is_zero(eval->sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
|
ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
|
||||||
{
|
{
|
||||||
eval->diffuse *= value;
|
eval->diffuse *= value;
|
||||||
eval->glossy *= value;
|
eval->glossy *= value;
|
||||||
|
eval->sum *= value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
|
ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
|
||||||
{
|
{
|
||||||
eval->diffuse *= value;
|
eval->diffuse *= value;
|
||||||
eval->glossy *= value;
|
eval->glossy *= value;
|
||||||
|
eval->sum *= value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
|
ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
|
||||||
{
|
{
|
||||||
return eval->diffuse + eval->glossy;
|
return eval->sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
|
ccl_device_inline float3 bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval)
|
||||||
{
|
{
|
||||||
/* Ratio of diffuse and glossy to recover proportions for writing to render pass.
|
/* Ratio of diffuse weight to recover proportions for writing to render pass.
|
||||||
* We assume reflection, transmission and volume scatter to be exclusive. */
|
* We assume reflection, transmission and volume scatter to be exclusive. */
|
||||||
return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
|
return safe_divide_float3_float3(eval->diffuse, eval->sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval)
|
||||||
|
{
|
||||||
|
/* Ratio of glossy weight to recover proportions for writing to render pass.
|
||||||
|
* We assume reflection, transmission and volume scatter to be exclusive. */
|
||||||
|
return safe_divide_float3_float3(eval->glossy, eval->sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --------------------------------------------------------------------
|
/* --------------------------------------------------------------------
|
||||||
@@ -141,7 +151,8 @@ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
|
|||||||
ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
|
ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
|
||||||
ConstIntegratorState state,
|
ConstIntegratorState state,
|
||||||
ccl_global float *ccl_restrict render_buffer,
|
ccl_global float *ccl_restrict render_buffer,
|
||||||
int sample)
|
int sample,
|
||||||
|
int sample_offset)
|
||||||
{
|
{
|
||||||
if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
|
if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
|
||||||
return sample;
|
return sample;
|
||||||
@@ -149,7 +160,9 @@ ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
|
|||||||
|
|
||||||
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
||||||
|
|
||||||
return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
|
return atomic_fetch_and_add_uint32(
|
||||||
|
(ccl_global uint *)(buffer) + kernel_data.film.pass_sample_count, 1) +
|
||||||
|
sample_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
|
ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
|
||||||
@@ -351,22 +364,31 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg
|
|||||||
/* Directly visible, write to emission or background pass. */
|
/* Directly visible, write to emission or background pass. */
|
||||||
pass_offset = pass;
|
pass_offset = pass;
|
||||||
}
|
}
|
||||||
else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
|
else if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
|
if (path_flag & PATH_RAY_SURFACE_PASS) {
|
||||||
/* Indirectly visible through reflection. */
|
/* Indirectly visible through reflection. */
|
||||||
const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
|
const float3 diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||||
((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
const float3 glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight);
|
||||||
|
|
||||||
|
/* Glossy */
|
||||||
|
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||||
kernel_data.film.pass_glossy_direct :
|
kernel_data.film.pass_glossy_direct :
|
||||||
kernel_data.film.pass_glossy_indirect) :
|
kernel_data.film.pass_glossy_indirect);
|
||||||
((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
if (glossy_pass_offset != PASS_UNUSED) {
|
||||||
|
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Transmission */
|
||||||
|
const int transmission_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||||
kernel_data.film.pass_transmission_direct :
|
kernel_data.film.pass_transmission_direct :
|
||||||
kernel_data.film.pass_transmission_indirect);
|
kernel_data.film.pass_transmission_indirect);
|
||||||
|
|
||||||
if (glossy_pass_offset != PASS_UNUSED) {
|
if (transmission_pass_offset != PASS_UNUSED) {
|
||||||
/* Glossy is a subset of the throughput, reconstruct it here using the
|
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
|
||||||
* diffuse-glossy ratio. */
|
* GPU memory. */
|
||||||
const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
|
||||||
const float3 glossy_contribution = (one_float3() - ratio) * contribution;
|
kernel_write_pass_float3(buffer + transmission_pass_offset,
|
||||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
|
transmission_weight * contribution);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reconstruct diffuse subset of throughput. */
|
/* Reconstruct diffuse subset of throughput. */
|
||||||
@@ -374,7 +396,7 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg
|
|||||||
kernel_data.film.pass_diffuse_direct :
|
kernel_data.film.pass_diffuse_direct :
|
||||||
kernel_data.film.pass_diffuse_indirect;
|
kernel_data.film.pass_diffuse_indirect;
|
||||||
if (pass_offset != PASS_UNUSED) {
|
if (pass_offset != PASS_UNUSED) {
|
||||||
contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
contribution *= diffuse_weight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||||
@@ -383,6 +405,7 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg
|
|||||||
kernel_data.film.pass_volume_direct :
|
kernel_data.film.pass_volume_direct :
|
||||||
kernel_data.film.pass_volume_indirect;
|
kernel_data.film.pass_volume_indirect;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Single write call for GPU coherence. */
|
/* Single write call for GPU coherence. */
|
||||||
if (pass_offset != PASS_UNUSED) {
|
if (pass_offset != PASS_UNUSED) {
|
||||||
@@ -426,24 +449,34 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
|
|||||||
#ifdef __PASSES__
|
#ifdef __PASSES__
|
||||||
if (kernel_data.film.light_pass_flag & PASS_ANY) {
|
if (kernel_data.film.light_pass_flag & PASS_ANY) {
|
||||||
const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
|
const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
|
||||||
|
|
||||||
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
int pass_offset = PASS_UNUSED;
|
int pass_offset = PASS_UNUSED;
|
||||||
|
|
||||||
if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
|
if (path_flag & PATH_RAY_SURFACE_PASS) {
|
||||||
/* Indirectly visible through reflection. */
|
/* Indirectly visible through reflection. */
|
||||||
const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
|
const float3 diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight);
|
||||||
((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
const float3 glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight);
|
||||||
|
|
||||||
|
/* Glossy */
|
||||||
|
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||||
kernel_data.film.pass_glossy_direct :
|
kernel_data.film.pass_glossy_direct :
|
||||||
kernel_data.film.pass_glossy_indirect) :
|
kernel_data.film.pass_glossy_indirect);
|
||||||
((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
if (glossy_pass_offset != PASS_UNUSED) {
|
||||||
|
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Transmission */
|
||||||
|
const int transmission_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||||
kernel_data.film.pass_transmission_direct :
|
kernel_data.film.pass_transmission_direct :
|
||||||
kernel_data.film.pass_transmission_indirect);
|
kernel_data.film.pass_transmission_indirect);
|
||||||
|
|
||||||
if (glossy_pass_offset != PASS_UNUSED) {
|
if (transmission_pass_offset != PASS_UNUSED) {
|
||||||
/* Glossy is a subset of the throughput, reconstruct it here using the
|
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
|
||||||
* diffuse-glossy ratio. */
|
* GPU memory. */
|
||||||
const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
|
const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
|
||||||
const float3 glossy_contribution = (one_float3() - ratio) * contribution;
|
kernel_write_pass_float3(buffer + transmission_pass_offset,
|
||||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
|
transmission_weight * contribution);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reconstruct diffuse subset of throughput. */
|
/* Reconstruct diffuse subset of throughput. */
|
||||||
@@ -451,7 +484,7 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
|
|||||||
kernel_data.film.pass_diffuse_direct :
|
kernel_data.film.pass_diffuse_direct :
|
||||||
kernel_data.film.pass_diffuse_indirect;
|
kernel_data.film.pass_diffuse_indirect;
|
||||||
if (pass_offset != PASS_UNUSED) {
|
if (pass_offset != PASS_UNUSED) {
|
||||||
contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
|
contribution *= diffuse_weight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||||
@@ -465,10 +498,11 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
|
|||||||
if (pass_offset != PASS_UNUSED) {
|
if (pass_offset != PASS_UNUSED) {
|
||||||
kernel_write_pass_float3(buffer + pass_offset, contribution);
|
kernel_write_pass_float3(buffer + pass_offset, contribution);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Write shadow pass. */
|
/* Write shadow pass. */
|
||||||
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
|
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
|
||||||
(path_flag & PATH_RAY_CAMERA)) {
|
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||||
const float3 unshadowed_throughput = INTEGRATOR_STATE(
|
const float3 unshadowed_throughput = INTEGRATOR_STATE(
|
||||||
state, shadow_path, unshadowed_throughput);
|
state, shadow_path, unshadowed_throughput);
|
||||||
const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
|
const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
|
||||||
@@ -519,7 +553,7 @@ ccl_device_inline void kernel_accum_background(KernelGlobals kg,
|
|||||||
const bool is_transparent_background_ray,
|
const bool is_transparent_background_ray,
|
||||||
ccl_global float *ccl_restrict render_buffer)
|
ccl_global float *ccl_restrict render_buffer)
|
||||||
{
|
{
|
||||||
float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L;
|
float3 contribution = float3(INTEGRATOR_STATE(state, path, throughput)) * L;
|
||||||
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
|
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
|
||||||
|
|
||||||
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
||||||
@@ -540,11 +574,10 @@ ccl_device_inline void kernel_accum_background(KernelGlobals kg,
|
|||||||
/* Write emission to render buffer. */
|
/* Write emission to render buffer. */
|
||||||
ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
|
ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
|
||||||
ConstIntegratorState state,
|
ConstIntegratorState state,
|
||||||
const float3 throughput,
|
|
||||||
const float3 L,
|
const float3 L,
|
||||||
ccl_global float *ccl_restrict render_buffer)
|
ccl_global float *ccl_restrict render_buffer)
|
||||||
{
|
{
|
||||||
float3 contribution = throughput * L;
|
float3 contribution = L;
|
||||||
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
|
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
|
||||||
|
|
||||||
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
||||||
|
@@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
|
|||||||
}
|
}
|
||||||
#endif /* __DENOISING_FEATURES__ */
|
#endif /* __DENOISING_FEATURES__ */
|
||||||
|
|
||||||
#ifdef __SHADOW_CATCHER__
|
|
||||||
|
|
||||||
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
|
||||||
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
|
||||||
KernelGlobals kg,
|
|
||||||
IntegratorState state,
|
|
||||||
ccl_private const ShaderData *sd,
|
|
||||||
ccl_global float *ccl_restrict render_buffer)
|
|
||||||
{
|
|
||||||
if (!kernel_data.integrator.has_shadow_catcher) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
|
||||||
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
|
||||||
|
|
||||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
|
|
||||||
|
|
||||||
/* Count sample for the shadow catcher object. */
|
|
||||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
|
||||||
|
|
||||||
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
|
||||||
* transparency to the matte. */
|
|
||||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
|
||||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
|
||||||
average(throughput));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __SHADOW_CATCHER__ */
|
|
||||||
|
|
||||||
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
|
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
|
||||||
size_t depth,
|
size_t depth,
|
||||||
float id,
|
float id,
|
||||||
@@ -211,7 +177,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
|
|||||||
#ifdef __PASSES__
|
#ifdef __PASSES__
|
||||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||||
|
|
||||||
if (!(path_flag & PATH_RAY_CAMERA)) {
|
if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -27,7 +27,12 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* Lookup of attributes is different between OSL and SVM, as OSL is ustring
|
* Lookup of attributes is different between OSL and SVM, as OSL is ustring
|
||||||
* based while for SVM we use integer ids. */
|
* based while for SVM we use integer ids. */
|
||||||
|
|
||||||
ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd);
|
/* Patch index for triangle, -1 if not subdivision triangle */
|
||||||
|
|
||||||
|
ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd)
|
||||||
|
{
|
||||||
|
return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
|
||||||
|
}
|
||||||
|
|
||||||
ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd)
|
ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd)
|
||||||
{
|
{
|
||||||
@@ -106,9 +111,9 @@ ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
|
|||||||
{
|
{
|
||||||
Transform tfm;
|
Transform tfm;
|
||||||
|
|
||||||
tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
|
tfm.x = kernel_tex_fetch(__attributes_float4, desc.offset + 0);
|
||||||
tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
|
tfm.y = kernel_tex_fetch(__attributes_float4, desc.offset + 1);
|
||||||
tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
|
tfm.z = kernel_tex_fetch(__attributes_float4, desc.offset + 2);
|
||||||
|
|
||||||
return tfm;
|
return tfm;
|
||||||
}
|
}
|
||||||
|
@@ -126,8 +126,8 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg,
|
|||||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||||
int k1 = k0 + 1;
|
int k1 = k0 + 1;
|
||||||
|
|
||||||
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
|
float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
|
||||||
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
|
float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
|
||||||
|
|
||||||
# ifdef __RAY_DIFFERENTIALS__
|
# ifdef __RAY_DIFFERENTIALS__
|
||||||
if (dx)
|
if (dx)
|
||||||
@@ -149,7 +149,7 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg,
|
|||||||
if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
|
if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
|
||||||
const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
|
const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
|
||||||
desc.offset;
|
desc.offset;
|
||||||
return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
|
return kernel_tex_fetch(__attributes_float3, offset);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return make_float3(0.0f, 0.0f, 0.0f);
|
return make_float3(0.0f, 0.0f, 0.0f);
|
||||||
@@ -168,8 +168,8 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg,
|
|||||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||||
int k1 = k0 + 1;
|
int k1 = k0 + 1;
|
||||||
|
|
||||||
float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
|
float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + k0);
|
||||||
float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
|
float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + k1);
|
||||||
|
|
||||||
# ifdef __RAY_DIFFERENTIALS__
|
# ifdef __RAY_DIFFERENTIALS__
|
||||||
if (dx)
|
if (dx)
|
||||||
@@ -191,7 +191,7 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg,
|
|||||||
if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
|
if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
|
||||||
const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
|
const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
|
||||||
desc.offset;
|
desc.offset;
|
||||||
return kernel_tex_fetch(__attributes_float3, offset);
|
return kernel_tex_fetch(__attributes_float4, offset);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user