Compare commits
455 Commits
node-tree-
...
soc-2020-i
Author | SHA1 | Date | |
---|---|---|---|
e9bbfd0c8c | |||
1aa953bd19 | |||
fc171c1be9 | |||
96c80950a1 | |||
6e9eebe286 | |||
53e7a2cb07 | |||
e3edf862a9 | |||
5f9eea200d | |||
4a7eca10ab | |||
d619c62157 | |||
1eed0031ec | |||
fef2b9e2eb | |||
db254aa981 | |||
40027f31b3 | |||
2cb056af9a | |||
22686b4ccf | |||
984ab4719d | |||
51ea8dc16d | |||
7a60565e54 | |||
30ee3a1d5d | |||
13be5a0e9b | |||
1100dd8ad1 | |||
ab97b22a1a | |||
b3adbd8ed8 | |||
b985418557 | |||
fec16d4a8b | |||
ccc97b664c | |||
bfea06a639 | |||
6af509f5a5 | |||
85ab8eaa88 | |||
dc68998aad | |||
8268e687b0 | |||
7c603e6928 | |||
eb16cebee4 | |||
bee5c9720d | |||
94dac497b2 | |||
8abffed35a | |||
3ce42cb959 | |||
9abc78c3f2 | |||
a9cf404223 | |||
29d5149843 | |||
30def30420 | |||
10c8b88cf1 | |||
5280200275 | |||
ad40a7c3d6 | |||
d6f0cf048d | |||
7b4bb69866 | |||
fc71aaa487 | |||
c59a5002e2 | |||
5c48127877 | |||
32dad88e8f | |||
7206226ee0 | |||
b54aa8a77c | |||
7faaaf7fd7 | |||
01fa2a239c | |||
6cc5e59b49 | |||
06d61d459f | |||
f4b22244ca | |||
df6fe191a9 | |||
d6f4c1f0d3 | |||
9fc8ec7791 | |||
65b7d952ff | |||
e5a97df7b5 | |||
4b40cce671 | |||
19f926f593 | |||
ae8b87e757 | |||
7859e3c280 | |||
43c1e37d45 | |||
d1492ebb6d | |||
36904f01a6 | |||
44629501fe | |||
533964ca7d | |||
1c3d4ace05 | |||
57ae71dbf3 | |||
6ca3ca23de | |||
25bac75fae | |||
bc52869756 | |||
347a12ba4f | |||
1223782976 | |||
30cc8b8567 | |||
9df5b84b1b | |||
60b5a8ab62 | |||
8a785ee54f | |||
b046126ac5 | |||
5d3533f4d5 | |||
4e8c2cfd1c | |||
0bfa68c901 | |||
d58b2c258b | |||
f830f7cdef | |||
4db25c833a | |||
16c51ce148 | |||
af2161f901 | |||
5b3c7458e9 | |||
2761a59387 | |||
9f14881e61 | |||
3f94db1af2 | |||
53e53c1123 | |||
447d2b5b12 | |||
3d12e92200 | |||
d0bb2c6277 | |||
9554e93efd | |||
f52023a85c | |||
f5d986822d | |||
7f854c4bc4 | |||
2ace7521e3 | |||
670c103f10 | |||
183c4af692 | |||
85f809ca39 | |||
f1a60130bd | |||
b3f08db450 | |||
4f041c98fc | |||
0af469d131 | |||
4c8253a966 | |||
9c622b9223 | |||
1a658978e5 | |||
c2305201bc | |||
61ad01ec0c | |||
f0de7b58bf | |||
dbd7c31f86 | |||
ac2b609e4e | |||
d5b0467732 | |||
84d32cb5c5 | |||
55bc6ac173 | |||
9e04dc90c7 | |||
78bbdbd931 | |||
0444cabb41 | |||
33bd98a97b | |||
8b316eb7a8 | |||
bc2a8f01ba | |||
aaa4a544ae | |||
ef8b8f4030 | |||
2e16d14cee | |||
942039244e | |||
5ab177be32 | |||
6570780a79 | |||
e96dd70fac | |||
b6b4aa99ff | |||
ded6377ea0 | |||
3e544770f6 | |||
8d5101ce1d | |||
8ce2fdee8b | |||
9e15728380 | |||
c1c1e3ea88 | |||
d1cc8c4712 | |||
fb75c957de | |||
83e0db031a | |||
9ea4a50fee | |||
a45112a9ec | |||
dfeabd0032 | |||
0f99e37602 | |||
1d19c96a1f | |||
641f859f88 | |||
5e54c8a0d5 | |||
047189baf0 | |||
8b3f87f9f7 | |||
b7689c0083 | |||
b99246c613 | |||
e3e01ae8a2 | |||
82b4639477 | |||
c5f984d96c | |||
b136e11526 | |||
52d9d6cd21 | |||
d67476a83f | |||
1a0909b15d | |||
2ce3ffb306 | |||
159a6bc537 | |||
30e48c58ba | |||
5c3ec6245e | |||
a742bede2b | |||
3bffab7a07 | |||
bc78649c01 | |||
eaf54f9eb7 | |||
31d6df6d35 | |||
f4c70d7c9f | |||
fd529c01c9 | |||
0af083449e | |||
448dc2da2b | |||
67a147d2c4 | |||
6a5b147a9f | |||
8279501af5 | |||
b96aec3691 | |||
5117948285 | |||
6c8f6d5c44 | |||
6f7d03b643 | |||
d0e91f78fa | |||
83ac5a0f5f | |||
85b1234107 | |||
21a097f952 | |||
6b62935e89 | |||
24a9729921 | |||
008eb7af41 | |||
3c4a67c575 | |||
54f15a66a0 | |||
0ba0c3653b | |||
40a2caca86 | |||
be046b01d1 | |||
7a5d794862 | |||
9445be11fb | |||
0b9f41f4a0 | |||
a6ff8534f2 | |||
c666a4c03c | |||
e715c482cc | |||
c239d5f3b0 | |||
8290d2d15c | |||
46ff12a5d2 | |||
5be5f3b4bb | |||
387c58847d | |||
80abfd7d55 | |||
fc816d358e | |||
87f407d679 | |||
0aeb338d19 | |||
9ac8def136 | |||
a662ddd76a | |||
8c6a06e730 | |||
32fe286919 | |||
b22b962ca2 | |||
6cd247c277 | |||
73454388fb | |||
ae122533e7 | |||
4777a6a54a | |||
d514de67e6 | |||
edd5307e9e | |||
10e3f23ba3 | |||
e76ab12454 | |||
5b0cb5bbbe | |||
efab0bc704 | |||
69270c7800 | |||
0c7801fa2b | |||
3f8a8d1757 | |||
c583afd60b | |||
0479dc410f | |||
4923087d25 | |||
491bd8b8bb | |||
c815ba0a3c | |||
c5e0e82f68 | |||
a732abd99b | |||
99ededd947 | |||
ae1c5f16cb | |||
d882a63e98 | |||
69f70829d2 | |||
ba0d376e26 | |||
d9cdfba21e | |||
a725b6f7ac | |||
97aeaf8dde | |||
d5866e8d74 | |||
606b0d7da3 | |||
d2a798d29f | |||
c010cf5814 | |||
e0c0b8ffd1 | |||
bb2eca07a6 | |||
b378f84f7c | |||
009b37719d | |||
82eff7a025 | |||
d062c712e2 | |||
a0f21e47ae | |||
d2fcc7b48c | |||
6ee696e5bf | |||
9b37f94324 | |||
7f2893848e | |||
8598993157 | |||
9616e2ef78 | |||
6e419e95e7 | |||
0a339bb5e7 | |||
5fa1e0eb38 | |||
3cff0de435 | |||
27cebf309f | |||
06336941b3 | |||
2c20b379f9 | |||
d081bf97df | |||
befe950f18 | |||
1b1727ea6a | |||
d660455882 | |||
8339dd6647 | |||
d6f9400417 | |||
928736b173 | |||
9b8f2042b0 | |||
f9348be84f | |||
b8e4d4c6da | |||
d5ad01edf3 | |||
64ff38a7a7 | |||
722a793b74 | |||
8e58bd0996 | |||
a26657cdb8 | |||
c401d8a0ae | |||
48d3582196 | |||
9433124290 | |||
7b77d88275 | |||
19145856ba | |||
c5bd1631f6 | |||
a59bbede21 | |||
78f29f6c0a | |||
b33a4592a3 | |||
ffaa1df439 | |||
37467ec5e9 | |||
5898f6ef1f | |||
af278ce58b | |||
25526821a4 | |||
4ec4f5b309 | |||
ed8c902a6a | |||
2bb56c83fa | |||
4716660591 | |||
97aa9d44fa | |||
cab598f2b2 | |||
31b7a53605 | |||
7bd38c2776 | |||
95716b7681 | |||
6e21f8c20d | |||
9cb750ba66 | |||
b9718a4795 | |||
a7f5998550 | |||
71eadb4b62 | |||
5468cc0aae | |||
7139d216f3 | |||
dce0a628a2 | |||
65fd3be1fa | |||
5a9b983263 | |||
37d59dbc8c | |||
e33d8f79a9 | |||
ef5941f31a | |||
fa0daf9a3e | |||
151e882512 | |||
fe4c5350c4 | |||
c275b2784b | |||
aab8982f9e | |||
92be92befe | |||
031c4732f4 | |||
501ead4bbc | |||
81d46ef2bd | |||
a4a1184ece | |||
d68899e99a | |||
aa9e4b23e5 | |||
aacb1f4756 | |||
5e9196ed11 | |||
4c9b344a5d | |||
ec04edfe5c | |||
3cfcf37cea | |||
f8d64b396d | |||
582bf4397c | |||
7582bbd574 | |||
f7c2fb187d | |||
c4cec5e52c | |||
7b123fec1c | |||
3e30be30ac | |||
e3fb4d0dd6 | |||
fc58522598 | |||
d00d2bd308 | |||
e9aa1b92d6 | |||
a10942814a | |||
f4abd34699 | |||
5299b52d7a | |||
d14811e517 | |||
928f5c9b9a | |||
550b7cfba0 | |||
e517cc0a06 | |||
a344a1cbe8 | |||
d7f9a627f6 | |||
639d512369 | |||
e072382976 | |||
e061df6497 | |||
fe3a359fb1 | |||
c2eb16f662 | |||
68807cf466 | |||
d85c2620b8 | |||
91072928cd | |||
42a8ea1af3 | |||
fd4e5563fd | |||
5a1ecb1702 | |||
df4e43d9fd | |||
5ee4aa5744 | |||
242df25b28 | |||
3fb230d6fe | |||
012175f843 | |||
71c6d384c1 | |||
905f470598 | |||
2585882973 | |||
9279377f4b | |||
054f3981b1 | |||
fdac45d68e | |||
e3e5ae5da8 | |||
6343c98a8e | |||
c2e43ca99a | |||
ddbc43afe0 | |||
35dc587fd6 | |||
469398155d | |||
02fa1f0bba | |||
faa11ec04a | |||
bab0fce914 | |||
9086989744 | |||
7685d9e450 | |||
9c6e3e103a | |||
6529b3a09e | |||
2e12333c19 | |||
cfa167d57f | |||
b22f8aec2b | |||
d724bf3797 | |||
c39128ca97 | |||
360ea0a715 | |||
eadc23273e | |||
11e63662be | |||
96d6571073 | |||
eb56b73895 | |||
827869a45b | |||
6c98925d5a | |||
42f24a134c | |||
dd92c95ea7 | |||
ef0eff0b8a | |||
31d480174a | |||
12fbd19ee7 | |||
0f383a3d7c | |||
117c990540 | |||
d13a05a6cf | |||
3a61338a09 | |||
f2a1a66b8c | |||
0d20c0a6ea | |||
f9289bb5ea | |||
c4c1d7b0b9 | |||
f3342b4bfc | |||
11357e18f7 | |||
b8e80fbce1 | |||
c3f8e9550c | |||
41f47cd8d6 | |||
d47318fb45 | |||
4ad57b2d7f | |||
1d75ece6ad | |||
4a59ba4bc2 | |||
4546101237 | |||
a79618a719 | |||
d21b3ff680 | |||
6d088bdde4 | |||
ad5b8bd899 | |||
c161c69179 | |||
40736795ec | |||
387d96294a | |||
3cbd4516c6 | |||
c2fbfb421b | |||
f01a7d508d | |||
37074c26df | |||
0ea70ab8e5 | |||
86845ea907 | |||
7af1ec516f | |||
b234b2fcf1 | |||
02c7cf5322 | |||
083e110c49 | |||
5f951f96b6 | |||
472c67eec2 | |||
7c19ab2c61 | |||
485cc4330a | |||
3c947bd5a6 | |||
7294f0ce3d | |||
4864f7e281 | |||
3e3cee15bf | |||
475f15210d | |||
e2d9b9fd6a | |||
3cfb3360ca | |||
781b74589a |
@@ -440,11 +440,7 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||
|
||||
# AMD HIP
|
||||
if(WIN32)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
else()
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||
endif()
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
@@ -646,7 +642,7 @@ if(WIN32)
|
||||
option(WITH_WINDOWS_PDB "Generate a pdb file for client side stacktraces" ON)
|
||||
mark_as_advanced(WITH_WINDOWS_PDB)
|
||||
|
||||
option(WITH_WINDOWS_STRIPPED_PDB "Use a stripped PDB file" ON)
|
||||
option(WITH_WINDOWS_STRIPPED_PDB "Use a stripped PDB file" On)
|
||||
mark_as_advanced(WITH_WINDOWS_STRIPPED_PDB)
|
||||
|
||||
endif()
|
||||
|
@@ -168,7 +168,7 @@ def function_parm_wash_tokens(parm):
|
||||
# if tokens[-1].kind == To
|
||||
# remove trailing char
|
||||
if tokens[-1].kind == TokenKind.PUNCTUATION:
|
||||
if tokens[-1].spelling in {",", ")", ";"}:
|
||||
if tokens[-1].spelling in (",", ")", ";"):
|
||||
tokens.pop()
|
||||
# else:
|
||||
# print(tokens[-1].spelling)
|
||||
@@ -179,7 +179,7 @@ def function_parm_wash_tokens(parm):
|
||||
t_spelling = t.spelling
|
||||
ok = True
|
||||
if t_kind == TokenKind.KEYWORD:
|
||||
if t_spelling in {"const", "restrict", "volatile"}:
|
||||
if t_spelling in ("const", "restrict", "volatile"):
|
||||
ok = False
|
||||
elif t_spelling.startswith("__"):
|
||||
ok = False # __restrict
|
||||
|
@@ -81,5 +81,4 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@@ -27,7 +27,7 @@ if(NOT MSVC)
|
||||
endif()
|
||||
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(MSVC_CLANG ON)
|
||||
set(MSVC_CLANG On)
|
||||
set(VC_TOOLS_DIR $ENV{VCToolsRedistDir} CACHE STRING "Location of the msvc redistributables")
|
||||
set(MSVC_REDIST_DIR ${VC_TOOLS_DIR})
|
||||
if(DEFINED MSVC_REDIST_DIR)
|
||||
@@ -53,7 +53,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
endif()
|
||||
if(WITH_WINDOWS_STRIPPED_PDB)
|
||||
message(WARNING "stripped pdb not supported with clang, disabling..")
|
||||
set(WITH_WINDOWS_STRIPPED_PDB OFF)
|
||||
set(WITH_WINDOWS_STRIPPED_PDB Off)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -159,7 +159,7 @@ endif()
|
||||
if(WITH_COMPILER_ASAN AND MSVC AND NOT MSVC_CLANG)
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 19.28.29828)
|
||||
#set a flag so we don't have to do this comparison all the time
|
||||
SET(MSVC_ASAN ON)
|
||||
SET(MSVC_ASAN On)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fsanitize=address")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fsanitize=address")
|
||||
string(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /INCREMENTAL:NO")
|
||||
@@ -179,22 +179,22 @@ endif()
|
||||
|
||||
if(WITH_WINDOWS_SCCACHE AND CMAKE_VS_MSBUILD_COMMAND)
|
||||
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
|
||||
set(WITH_WINDOWS_SCCACHE OFF)
|
||||
set(WITH_WINDOWS_SCCACHE Off)
|
||||
endif()
|
||||
|
||||
# Debug Symbol format
|
||||
# sccache # MSVC_ASAN # format # why
|
||||
# ON # ON # Z7 # sccache will only play nice with Z7
|
||||
# ON # OFF # Z7 # sccache will only play nice with Z7
|
||||
# OFF # ON # Zi # Asan will not play nice with Edit and Continue
|
||||
# OFF # OFF # ZI # Neither asan nor sscache is enabled Edit and Continue is available
|
||||
# On # On # Z7 # sccache will only play nice with Z7
|
||||
# On # Off # Z7 # sccache will only play nice with Z7
|
||||
# Off # On # Zi # Asan will not play nice with Edit and Continue
|
||||
# Off # Off # ZI # Neither asan nor sscache is enabled Edit and Continue is available
|
||||
|
||||
# Release Symbol format
|
||||
# sccache # MSVC_ASAN # format # why
|
||||
# ON # ON # Z7 # sccache will only play nice with Z7
|
||||
# ON # OFF # Z7 # sccache will only play nice with Z7
|
||||
# OFF # ON # Zi # Asan will not play nice with Edit and Continue
|
||||
# OFF # OFF # Zi # Edit and Continue disables some optimizations
|
||||
# On # On # Z7 # sccache will only play nice with Z7
|
||||
# On # Off # Z7 # sccache will only play nice with Z7
|
||||
# Off # On # Zi # Asan will not play nice with Edit and Continue
|
||||
# Off # Off # Zi # Edit and Continue disables some optimizations
|
||||
|
||||
|
||||
if(WITH_WINDOWS_SCCACHE)
|
||||
@@ -288,7 +288,7 @@ if(CMAKE_GENERATOR MATCHES "^Visual Studio.+" AND # Only supported in the VS IDE
|
||||
"EnableMicrosoftCodeAnalysis=false"
|
||||
"EnableClangTidyCodeAnalysis=true"
|
||||
)
|
||||
set(VS_CLANG_TIDY ON)
|
||||
set(VS_CLANG_TIDY On)
|
||||
endif()
|
||||
|
||||
# Mark libdir as system headers with a lower warn level, to resolve some warnings
|
||||
@@ -469,7 +469,7 @@ if(WITH_PYTHON)
|
||||
|
||||
set(PYTHON_INCLUDE_DIR ${LIBDIR}/python/${_PYTHON_VERSION_NO_DOTS}/include)
|
||||
set(PYTHON_NUMPY_INCLUDE_DIRS ${LIBDIR}/python/${_PYTHON_VERSION_NO_DOTS}/lib/site-packages/numpy/core/include)
|
||||
set(NUMPY_FOUND ON)
|
||||
set(NUMPY_FOUND On)
|
||||
unset(_PYTHON_VERSION_NO_DOTS)
|
||||
# uncached vars
|
||||
set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
|
||||
@@ -853,18 +853,18 @@ if(WITH_GMP)
|
||||
set(GMP_INCLUDE_DIRS ${LIBDIR}/gmp/include)
|
||||
set(GMP_LIBRARIES ${LIBDIR}/gmp/lib/libgmp-10.lib optimized ${LIBDIR}/gmp/lib/libgmpxx.lib debug ${LIBDIR}/gmp/lib/libgmpxx_d.lib)
|
||||
set(GMP_ROOT_DIR ${LIBDIR}/gmp)
|
||||
set(GMP_FOUND ON)
|
||||
set(GMP_FOUND On)
|
||||
endif()
|
||||
|
||||
if(WITH_POTRACE)
|
||||
set(POTRACE_INCLUDE_DIRS ${LIBDIR}/potrace/include)
|
||||
set(POTRACE_LIBRARIES ${LIBDIR}/potrace/lib/potrace.lib)
|
||||
set(POTRACE_FOUND ON)
|
||||
set(POTRACE_FOUND On)
|
||||
endif()
|
||||
|
||||
if(WITH_HARU)
|
||||
if(EXISTS ${LIBDIR}/haru)
|
||||
set(HARU_FOUND ON)
|
||||
set(HARU_FOUND On)
|
||||
set(HARU_ROOT_DIR ${LIBDIR}/haru)
|
||||
set(HARU_INCLUDE_DIRS ${HARU_ROOT_DIR}/include)
|
||||
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
|
||||
|
@@ -2254,7 +2254,7 @@ def main():
|
||||
# First monkey patch to load in fake members.
|
||||
setup_monkey_patch()
|
||||
|
||||
# Perform changes to Blender itself.
|
||||
# Perform changes to Blender it's self.
|
||||
setup_data = setup_blender()
|
||||
|
||||
# eventually, create the dirs
|
||||
|
@@ -138,6 +138,11 @@ endif()
|
||||
|
||||
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
# avoid link failure with clang 3.4 debug
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -gline-tables-only")
|
||||
endif()
|
||||
|
||||
add_dependencies(bf_intern_cycles bf_rna)
|
||||
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH})
|
||||
|
@@ -233,7 +233,6 @@ def list_render_passes(scene, srl):
|
||||
if crl.denoising_store_passes:
|
||||
yield ("Denoising Normal", "XYZ", 'VECTOR')
|
||||
yield ("Denoising Albedo", "RGB", 'COLOR')
|
||||
yield ("Denoising Depth", "Z", 'VALUE')
|
||||
|
||||
# Custom AOV passes.
|
||||
for aov in srl.aovs:
|
||||
|
@@ -40,10 +40,10 @@ class AddPresetIntegrator(AddPresetBase, Operator):
|
||||
"cycles.transparent_max_bounces",
|
||||
"cycles.caustics_reflective",
|
||||
"cycles.caustics_refractive",
|
||||
"cycles.blur_glossy",
|
||||
"cycles.use_fast_gi",
|
||||
"cycles.ao_bounces",
|
||||
"cycles.ao_bounces_render",
|
||||
"cycles.blur_glossy"
|
||||
"cycles.use_fast_gi"
|
||||
"cycles.ao_bounces"
|
||||
"cycles.ao_bounces_render"
|
||||
]
|
||||
|
||||
preset_subdir = "cycles/integrator"
|
||||
|
@@ -87,7 +87,7 @@ enum_use_layer_samples = (
|
||||
|
||||
enum_sampling_pattern = (
|
||||
('SOBOL', "Sobol", "Use Sobol random sampling pattern", 0),
|
||||
('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
|
||||
('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
|
||||
)
|
||||
|
||||
enum_volume_sampling = (
|
||||
@@ -325,13 +325,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
default=1024,
|
||||
)
|
||||
|
||||
sample_offset: IntProperty(
|
||||
name="Sample Offset",
|
||||
description="Number of samples to skip when starting render",
|
||||
min=0, max=(1 << 24),
|
||||
default=0,
|
||||
)
|
||||
|
||||
time_limit: FloatProperty(
|
||||
name="Time Limit",
|
||||
description="Limit the render time (excluding synchronization time)."
|
||||
@@ -346,7 +339,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
name="Sampling Pattern",
|
||||
description="Random sampling pattern used by the integrator. When adaptive sampling is enabled, Progressive Multi-Jitter is always used instead of Sobol",
|
||||
items=enum_sampling_pattern,
|
||||
default='PROGRESSIVE_MULTI_JITTER',
|
||||
default='PROGRESSIVE_MUTI_JITTER',
|
||||
)
|
||||
|
||||
scrambling_distance: FloatProperty(
|
||||
@@ -1367,7 +1360,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
elif entry.type == 'CPU':
|
||||
cpu_devices.append(entry)
|
||||
# Extend all GPU devices with CPU.
|
||||
if len(devices) and compute_device_type != 'CPU':
|
||||
if compute_device_type != 'CPU':
|
||||
devices.extend(cpu_devices)
|
||||
return devices
|
||||
|
||||
@@ -1385,18 +1378,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
self.refresh_devices()
|
||||
return None
|
||||
|
||||
def get_compute_device_type(self):
|
||||
if self.compute_device_type == '':
|
||||
return 'NONE'
|
||||
return self.compute_device_type
|
||||
|
||||
def get_num_gpu_devices(self):
|
||||
import _cycles
|
||||
compute_device_type = self.get_compute_device_type()
|
||||
device_list = _cycles.available_devices(compute_device_type)
|
||||
device_list = _cycles.available_devices(self.compute_device_type)
|
||||
num = 0
|
||||
for device in device_list:
|
||||
if device[1] != compute_device_type:
|
||||
if device[1] != self.compute_device_type:
|
||||
continue
|
||||
for dev in self.devices:
|
||||
if dev.use and dev.id == device[2]:
|
||||
@@ -1426,9 +1413,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
||||
elif device_type == 'HIP':
|
||||
import sys
|
||||
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="Requires discrete AMD GPU with ??? architecture", icon='BLANK1')
|
||||
if sys.platform[:3] == "win":
|
||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||
col.label(text="and AMD driver version ??? or newer", icon='BLANK1')
|
||||
return
|
||||
|
||||
for device in devices:
|
||||
@@ -1438,16 +1425,15 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
row = layout.row()
|
||||
row.prop(self, "compute_device_type", expand=True)
|
||||
|
||||
compute_device_type = self.get_compute_device_type()
|
||||
if compute_device_type == 'NONE':
|
||||
if self.compute_device_type == 'NONE':
|
||||
return
|
||||
row = layout.row()
|
||||
devices = self.get_devices_for_type(compute_device_type)
|
||||
self._draw_devices(row, compute_device_type, devices)
|
||||
devices = self.get_devices_for_type(self.compute_device_type)
|
||||
self._draw_devices(row, self.compute_device_type, devices)
|
||||
|
||||
import _cycles
|
||||
has_peer_memory = 0
|
||||
for device in _cycles.available_devices(compute_device_type):
|
||||
for device in _cycles.available_devices(self.compute_device_type):
|
||||
if device[3] and self.find_existing_device_entry(device).use:
|
||||
has_peer_memory += 1
|
||||
if has_peer_memory > 1:
|
||||
|
@@ -290,9 +290,6 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
|
||||
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||
col.prop(cscene, "sampling_pattern", text="Pattern")
|
||||
|
||||
col = layout.column(align=True)
|
||||
col.prop(cscene, "sample_offset")
|
||||
|
||||
layout.separator()
|
||||
|
||||
col = layout.column(align=True)
|
||||
@@ -1054,7 +1051,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
|
||||
|
||||
|
||||
def has_geometry_visibility(ob):
|
||||
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'HAIR'}) or
|
||||
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
|
||||
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
|
||||
|
||||
|
||||
|
@@ -86,7 +86,7 @@ def do_versions(self):
|
||||
# Device might not currently be available so this can fail
|
||||
try:
|
||||
if system.legacy_compute_device_type == 1:
|
||||
prop.compute_device_type = 'NONE' # Was OpenCL
|
||||
prop.compute_device_type = 'OPENCL'
|
||||
elif system.legacy_compute_device_type == 2:
|
||||
prop.compute_device_type = 'CUDA'
|
||||
else:
|
||||
@@ -97,12 +97,6 @@ def do_versions(self):
|
||||
# Init device list for UI
|
||||
prop.get_devices(prop.compute_device_type)
|
||||
|
||||
if bpy.context.preferences.version <= (3, 0, 40):
|
||||
# Disable OpenCL device
|
||||
prop = bpy.context.preferences.addons[__package__].preferences
|
||||
if prop.is_property_set("compute_device_type") and prop['compute_device_type'] == 4:
|
||||
prop.compute_device_type = 'NONE'
|
||||
|
||||
# We don't modify startup file because it assumes to
|
||||
# have all the default values only.
|
||||
if not bpy.data.is_saved:
|
||||
@@ -243,7 +237,7 @@ def do_versions(self):
|
||||
cscene.use_preview_denoising = False
|
||||
if not cscene.is_property_set("sampling_pattern") or \
|
||||
cscene.get('sampling_pattern') >= 2:
|
||||
cscene.sampling_pattern = 'PROGRESSIVE_MULTI_JITTER'
|
||||
cscene.sampling_pattern = 'PROGRESSIVE_MUTI_JITTER'
|
||||
|
||||
# Removal of square samples.
|
||||
cscene = scene.cycles
|
||||
|
@@ -639,7 +639,7 @@ void BlenderSync::sync_camera_motion(
|
||||
/* TODO(sergey): De-duplicate calculation with camera sync. */
|
||||
float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
|
||||
if (fov != cam->get_fov()) {
|
||||
VLOG(3) << "Camera " << b_ob.name() << " FOV change detected.";
|
||||
VLOG(1) << "Camera " << b_ob.name() << " FOV change detected.";
|
||||
if (motion_time == 0.0f) {
|
||||
cam->set_fov(fov);
|
||||
}
|
||||
|
@@ -304,6 +304,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
|
||||
}
|
||||
}
|
||||
|
||||
if (num_curves > 0) {
|
||||
VLOG(1) << "Exporting curve segments for mesh " << hair->name;
|
||||
}
|
||||
|
||||
hair->reserve_curves(hair->num_curves() + num_curves, hair->get_curve_keys().size() + num_keys);
|
||||
|
||||
num_keys = 0;
|
||||
@@ -352,7 +356,7 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
|
||||
|
||||
/* check allocation */
|
||||
if ((hair->get_curve_keys().size() != num_keys) || (hair->num_curves() != num_curves)) {
|
||||
VLOG(1) << "Hair memory allocation failed, clearing data.";
|
||||
VLOG(1) << "Allocation failed, clearing data";
|
||||
hair->clear(true);
|
||||
}
|
||||
}
|
||||
@@ -408,11 +412,16 @@ static void export_hair_motion_validate_attribute(Hair *hair,
|
||||
if (num_motion_keys != num_keys || !have_motion) {
|
||||
/* No motion or hair "topology" changed, remove attributes again. */
|
||||
if (num_motion_keys != num_keys) {
|
||||
VLOG(1) << "Hair topology changed, removing motion attribute.";
|
||||
VLOG(1) << "Hair topology changed, removing attribute.";
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "No motion, removing attribute.";
|
||||
}
|
||||
hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
else if (motion_step > 0) {
|
||||
VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
|
||||
|
||||
/* Motion, fill up previous steps that we might have skipped because
|
||||
* they had no motion, but we need them anyway now. */
|
||||
for (int step = 0; step < motion_step; step++) {
|
||||
@@ -428,12 +437,16 @@ static void export_hair_motion_validate_attribute(Hair *hair,
|
||||
|
||||
static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
|
||||
{
|
||||
VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
|
||||
<< motion_step;
|
||||
|
||||
/* find attribute */
|
||||
Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
bool new_attribute = false;
|
||||
|
||||
/* add new attribute if it doesn't exist already */
|
||||
if (!attr_mP) {
|
||||
VLOG(1) << "Creating new motion vertex position attribute";
|
||||
attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
new_attribute = true;
|
||||
}
|
||||
@@ -669,6 +682,10 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
|
||||
const int num_keys = b_hair.points.length();
|
||||
const int num_curves = b_hair.curves.length();
|
||||
|
||||
if (num_curves > 0) {
|
||||
VLOG(1) << "Exporting curve segments for hair " << hair->name;
|
||||
}
|
||||
|
||||
hair->reserve_curves(num_curves, num_keys);
|
||||
|
||||
/* Export curves and points. */
|
||||
@@ -726,11 +743,15 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
|
||||
|
||||
static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
|
||||
{
|
||||
VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
|
||||
<< motion_step;
|
||||
|
||||
/* Find or add attribute. */
|
||||
Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
bool new_attribute = false;
|
||||
|
||||
if (!attr_mP) {
|
||||
VLOG(1) << "Creating new motion vertex position attribute";
|
||||
attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
new_attribute = true;
|
||||
}
|
||||
|
@@ -62,46 +62,31 @@ bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)
|
||||
bool BlenderSync::object_is_geometry(BL::Object &b_ob)
|
||||
{
|
||||
BL::ID b_ob_data = b_ob_info.object_data;
|
||||
BL::ID b_ob_data = b_ob.data();
|
||||
|
||||
if (!b_ob_data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BL::Object::type_enum type = b_ob_info.iter_object.type();
|
||||
BL::Object::type_enum type = b_ob.type();
|
||||
|
||||
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
|
||||
/* Will be exported attached to mesh. */
|
||||
return true;
|
||||
}
|
||||
else if (type == BL::Object::type_CURVE) {
|
||||
/* Skip exporting curves without faces, overhead can be
|
||||
* significant if there are many for path animation. */
|
||||
BL::Curve b_curve(b_ob_data);
|
||||
|
||||
/* Other object types that are not meshes but evaluate to meshes are presented to render engines
|
||||
* as separate instance objects. Metaballs and surface objects have not been affected by that
|
||||
* change yet. */
|
||||
if (type == BL::Object::type_SURFACE || type == BL::Object::type_META) {
|
||||
return true;
|
||||
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
|
||||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
|
||||
}
|
||||
|
||||
return b_ob_data.is_a(&RNA_Mesh);
|
||||
}
|
||||
|
||||
bool BlenderSync::object_can_have_geometry(BL::Object &b_ob)
|
||||
{
|
||||
BL::Object::type_enum type = b_ob.type();
|
||||
switch (type) {
|
||||
case BL::Object::type_MESH:
|
||||
case BL::Object::type_CURVE:
|
||||
case BL::Object::type_SURFACE:
|
||||
case BL::Object::type_META:
|
||||
case BL::Object::type_FONT:
|
||||
case BL::Object::type_HAIR:
|
||||
case BL::Object::type_POINTCLOUD:
|
||||
case BL::Object::type_VOLUME:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
else {
|
||||
return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
|
||||
b_ob_data.is_a(&RNA_MetaBall));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,11 +161,6 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
if (is_instance) {
|
||||
persistent_id_array = b_instance.persistent_id();
|
||||
persistent_id = persistent_id_array.data;
|
||||
if (!b_ob_info.is_real_object_data()) {
|
||||
/* Remember which object data the geometry is coming from, so that we can sync it when the
|
||||
* object has changed. */
|
||||
instance_geometries_by_object[b_ob_info.real_object.ptr.data].insert(b_ob_info.object_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* light is handled separately */
|
||||
@@ -207,7 +187,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
}
|
||||
|
||||
/* only interested in object that we can create meshes from */
|
||||
if (!object_is_geometry(b_ob_info)) {
|
||||
if (!object_is_geometry(b_ob)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -294,7 +274,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
|
||||
object->set_visibility(visibility);
|
||||
|
||||
object->set_is_shadow_catcher(b_ob.is_shadow_catcher() || b_parent.is_shadow_catcher());
|
||||
object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
|
||||
|
||||
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
|
||||
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
|
||||
@@ -580,7 +560,6 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
|
||||
else {
|
||||
geometry_motion_synced.clear();
|
||||
}
|
||||
instance_geometries_by_object.clear();
|
||||
|
||||
/* initialize culling */
|
||||
BlenderObjectCulling culling(scene, b_scene);
|
||||
|
@@ -157,6 +157,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
|
||||
|
||||
DebugFlags().running_inside_blender = true;
|
||||
|
||||
VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
@@ -883,6 +885,8 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
|
||||
|
||||
debug_flags_sync_from_scene(b_scene);
|
||||
|
||||
VLOG(2) << "Debug flags set to:\n" << DebugFlags();
|
||||
|
||||
debug_flags_set = true;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
@@ -892,6 +896,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
|
||||
{
|
||||
debug_flags_reset();
|
||||
if (debug_flags_set) {
|
||||
VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
|
||||
debug_flags_set = false;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
|
@@ -606,19 +606,6 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
pass->set_type(bake_type_to_pass(bake_type, bake_filter));
|
||||
pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
|
||||
|
||||
if (pass->get_type() == PASS_COMBINED) {
|
||||
/* Filtering settings for combined pass. */
|
||||
Integrator *integrator = scene->integrator;
|
||||
integrator->set_use_direct_light((bake_filter & BL::BakeSettings::pass_filter_DIRECT) != 0);
|
||||
integrator->set_use_indirect_light((bake_filter & BL::BakeSettings::pass_filter_INDIRECT) !=
|
||||
0);
|
||||
integrator->set_use_diffuse((bake_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0);
|
||||
integrator->set_use_glossy((bake_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0);
|
||||
integrator->set_use_transmission((bake_filter & BL::BakeSettings::pass_filter_TRANSMISSION) !=
|
||||
0);
|
||||
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||
}
|
||||
|
||||
session->set_display_driver(nullptr);
|
||||
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
|
||||
|
||||
|
@@ -162,19 +162,19 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
|
||||
/* Object */
|
||||
else if (b_id.is_a(&RNA_Object)) {
|
||||
BL::Object b_ob(b_id);
|
||||
const bool can_have_geometry = object_can_have_geometry(b_ob);
|
||||
const bool is_light = !can_have_geometry && object_is_light(b_ob);
|
||||
const bool is_geometry = object_is_geometry(b_ob);
|
||||
const bool is_light = !is_geometry && object_is_light(b_ob);
|
||||
|
||||
if (b_ob.is_instancer() && b_update.is_updated_shading()) {
|
||||
/* Needed for e.g. object color updates on instancer. */
|
||||
object_map.set_recalc(b_ob);
|
||||
}
|
||||
|
||||
if (can_have_geometry || is_light) {
|
||||
if (is_geometry || is_light) {
|
||||
const bool updated_geometry = b_update.is_updated_geometry();
|
||||
|
||||
/* Geometry (mesh, hair, volume). */
|
||||
if (can_have_geometry) {
|
||||
if (is_geometry) {
|
||||
if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
|
||||
object_map.set_recalc(b_ob);
|
||||
}
|
||||
@@ -183,15 +183,6 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
|
||||
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
|
||||
BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
|
||||
geometry_map.set_recalc(key);
|
||||
|
||||
/* Sync all contained geometry instances as well when the object changed.. */
|
||||
map<void *, set<BL::ID>>::const_iterator instance_geometries =
|
||||
instance_geometries_by_object.find(b_ob.ptr.data);
|
||||
if (instance_geometries != instance_geometries_by_object.end()) {
|
||||
for (BL::ID geometry : instance_geometries->second) {
|
||||
geometry_map.set_recalc(geometry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (updated_geometry) {
|
||||
@@ -375,9 +366,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
||||
if ((preview && !preview_scrambling_distance) || use_adaptive_sampling)
|
||||
scrambling_distance = 1.0f;
|
||||
|
||||
if (scrambling_distance != 1.0f) {
|
||||
VLOG(3) << "Using scrambling distance: " << scrambling_distance;
|
||||
}
|
||||
VLOG(1) << "Used Scrambling Distance: " << scrambling_distance;
|
||||
integrator->set_scrambling_distance(scrambling_distance);
|
||||
|
||||
if (get_boolean(cscene, "use_fast_gi")) {
|
||||
@@ -835,25 +824,18 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
||||
/* samples */
|
||||
int samples = get_int(cscene, "samples");
|
||||
int preview_samples = get_int(cscene, "preview_samples");
|
||||
int sample_offset = get_int(cscene, "sample_offset");
|
||||
|
||||
if (background) {
|
||||
params.samples = samples;
|
||||
params.sample_offset = sample_offset;
|
||||
}
|
||||
else {
|
||||
params.samples = preview_samples;
|
||||
if (params.samples == 0) {
|
||||
if (params.samples == 0)
|
||||
params.samples = INT_MAX;
|
||||
}
|
||||
params.sample_offset = 0;
|
||||
}
|
||||
|
||||
/* Clamp sample offset. */
|
||||
params.sample_offset = clamp(params.sample_offset, 0, Integrator::MAX_SAMPLES);
|
||||
|
||||
/* Clamp samples. */
|
||||
params.samples = clamp(params.samples, 0, Integrator::MAX_SAMPLES - params.sample_offset);
|
||||
params.samples = min(params.samples, Integrator::MAX_SAMPLES);
|
||||
|
||||
/* Viewport Performance */
|
||||
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
|
||||
|
@@ -208,8 +208,7 @@ class BlenderSync {
|
||||
/* util */
|
||||
void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
|
||||
bool BKE_object_is_modified(BL::Object &b_ob);
|
||||
bool object_is_geometry(BObjectInfo &b_ob_info);
|
||||
bool object_can_have_geometry(BL::Object &b_ob);
|
||||
bool object_is_geometry(BL::Object &b_ob);
|
||||
bool object_is_light(BL::Object &b_ob);
|
||||
|
||||
/* variables */
|
||||
@@ -226,8 +225,6 @@ class BlenderSync {
|
||||
set<Geometry *> geometry_synced;
|
||||
set<Geometry *> geometry_motion_synced;
|
||||
set<Geometry *> geometry_motion_attribute_synced;
|
||||
/** Remember which geometries come from which objects to be able to sync them after changes. */
|
||||
map<void *, set<BL::ID>> instance_geometries_by_object;
|
||||
set<float> motion_times;
|
||||
void *world_map;
|
||||
bool world_recalc;
|
||||
|
@@ -38,6 +38,7 @@ void device_cpu_info(vector<DeviceInfo> &devices)
|
||||
info.id = "CPU";
|
||||
info.num = 0;
|
||||
info.has_osl = true;
|
||||
info.has_half_images = true;
|
||||
info.has_nanovdb = true;
|
||||
info.has_profiling = true;
|
||||
if (openimagedenoise_supported()) {
|
||||
|
@@ -68,8 +68,8 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_
|
||||
{
|
||||
/* Pick any kernel, all of them are supposed to have same level of microarchitecture
|
||||
* optimization. */
|
||||
VLOG(1) << "Using " << get_cpu_kernels().integrator_init_from_camera.get_uarch_name()
|
||||
<< " CPU kernels.";
|
||||
VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
|
||||
<< " kernels.";
|
||||
|
||||
if (info.cpu_threads == 0) {
|
||||
info.cpu_threads = TaskScheduler::num_threads();
|
||||
@@ -297,6 +297,11 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
|
||||
const CPUKernels *CPUDevice::get_cpu_kernels() const
|
||||
{
|
||||
return &kernels;
|
||||
}
|
||||
|
||||
void CPUDevice::get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
|
||||
{
|
||||
|
@@ -57,6 +57,8 @@ class CPUDevice : public Device {
|
||||
RTCDevice embree_device;
|
||||
#endif
|
||||
|
||||
CPUKernels kernels;
|
||||
|
||||
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
||||
~CPUDevice();
|
||||
|
||||
@@ -88,6 +90,7 @@ class CPUDevice : public Device {
|
||||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
virtual const CPUKernels *get_cpu_kernels() const override;
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
|
||||
virtual void *get_cpu_osl_memory() override;
|
||||
|
@@ -26,9 +26,6 @@ CCL_NAMESPACE_BEGIN
|
||||
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
||||
|
||||
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
|
||||
#define REGISTER_KERNEL_FILM_CONVERT(name) \
|
||||
film_convert_##name(KERNEL_FUNCTIONS(film_convert_##name)), \
|
||||
film_convert_half_rgba_##name(KERNEL_FUNCTIONS(film_convert_half_rgba_##name))
|
||||
|
||||
CPUKernels::CPUKernels()
|
||||
: /* Integrator. */
|
||||
@@ -53,25 +50,11 @@ CPUKernels::CPUKernels()
|
||||
REGISTER_KERNEL(adaptive_sampling_filter_x),
|
||||
REGISTER_KERNEL(adaptive_sampling_filter_y),
|
||||
/* Cryptomatte. */
|
||||
REGISTER_KERNEL(cryptomatte_postprocess),
|
||||
/* Film Convert. */
|
||||
REGISTER_KERNEL_FILM_CONVERT(depth),
|
||||
REGISTER_KERNEL_FILM_CONVERT(mist),
|
||||
REGISTER_KERNEL_FILM_CONVERT(sample_count),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float),
|
||||
REGISTER_KERNEL_FILM_CONVERT(light_path),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float3),
|
||||
REGISTER_KERNEL_FILM_CONVERT(motion),
|
||||
REGISTER_KERNEL_FILM_CONVERT(cryptomatte),
|
||||
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher),
|
||||
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow),
|
||||
REGISTER_KERNEL_FILM_CONVERT(combined),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float4)
|
||||
REGISTER_KERNEL(cryptomatte_postprocess)
|
||||
{
|
||||
}
|
||||
|
||||
#undef REGISTER_KERNEL
|
||||
#undef REGISTER_KERNEL_FILM_CONVERT
|
||||
#undef KERNEL_FUNCTIONS
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -17,13 +17,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "device/cpu/kernel_function.h"
|
||||
#include "util/half.h"
|
||||
#include "util/types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelGlobalsCPU;
|
||||
struct KernelFilmConvert;
|
||||
struct IntegratorStateCPU;
|
||||
struct TileInfo;
|
||||
|
||||
@@ -42,7 +40,7 @@ class CPUKernels {
|
||||
|
||||
IntegratorInitFunction integrator_init_from_camera;
|
||||
IntegratorInitFunction integrator_init_from_bake;
|
||||
IntegratorShadeFunction integrator_intersect_closest;
|
||||
IntegratorFunction integrator_intersect_closest;
|
||||
IntegratorFunction integrator_intersect_shadow;
|
||||
IntegratorFunction integrator_intersect_subsurface;
|
||||
IntegratorFunction integrator_intersect_volume_stack;
|
||||
@@ -104,41 +102,6 @@ class CPUKernels {
|
||||
|
||||
CryptomattePostprocessFunction cryptomatte_postprocess;
|
||||
|
||||
/* Film Convert. */
|
||||
using FilmConvertFunction = CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
|
||||
const float *buffer,
|
||||
float *pixel,
|
||||
const int width,
|
||||
const int buffer_stride,
|
||||
const int pixel_stride)>;
|
||||
using FilmConvertHalfRGBAFunction =
|
||||
CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
|
||||
const float *buffer,
|
||||
half4 *pixel,
|
||||
const int width,
|
||||
const int buffer_stride)>;
|
||||
|
||||
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
|
||||
FilmConvertFunction film_convert_##name; \
|
||||
FilmConvertHalfRGBAFunction film_convert_half_rgba_##name;
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
CPUKernels();
|
||||
};
|
||||
|
||||
|
@@ -144,6 +144,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
|
||||
info.description = string(name);
|
||||
info.num = num;
|
||||
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_nanovdb = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
|
@@ -378,9 +378,7 @@ string CUDADevice::compile_kernel(const uint kernel_features,
|
||||
cubin.c_str(),
|
||||
common_cflags.c_str());
|
||||
|
||||
printf("Compiling %sCUDA kernel ...\n%s\n",
|
||||
(use_adaptive_compilation()) ? "adaptive " : "",
|
||||
command.c_str());
|
||||
printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
|
||||
|
||||
# ifdef _WIN32
|
||||
command = "call " + command;
|
||||
@@ -407,15 +405,13 @@ string CUDADevice::compile_kernel(const uint kernel_features,
|
||||
|
||||
bool CUDADevice::load_kernels(const uint kernel_features)
|
||||
{
|
||||
/* TODO(sergey): Support kernels re-load for CUDA devices adaptive compile.
|
||||
/* TODO(sergey): Support kernels re-load for CUDA devices.
|
||||
*
|
||||
* Currently re-loading kernel will invalidate memory pointers,
|
||||
* causing problems in cuCtxSynchronize.
|
||||
*/
|
||||
if (cuModule) {
|
||||
if (use_adaptive_compilation()) {
|
||||
VLOG(1) << "Skipping CUDA kernel reload for adaptive compilation, not currently supported.";
|
||||
}
|
||||
VLOG(1) << "Skipping kernel reload, not currently supported.";
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -931,6 +927,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
/* General variables for both architectures */
|
||||
string bind_name = mem.name;
|
||||
size_t dsize = datatype_size(mem.data_type);
|
||||
size_t size = mem.memory_size();
|
||||
@@ -1093,6 +1090,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
|
||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||
/* Kepler+, bindless textures. */
|
||||
CUDA_RESOURCE_DESC resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
|
@@ -23,7 +23,6 @@
|
||||
#include "device/queue.h"
|
||||
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/cpu/kernel.h"
|
||||
#include "device/cuda/device.h"
|
||||
#include "device/dummy/device.h"
|
||||
#include "device/hip/device.h"
|
||||
@@ -286,6 +285,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
info.description = "Multi Device";
|
||||
info.num = 0;
|
||||
|
||||
info.has_half_images = true;
|
||||
info.has_nanovdb = true;
|
||||
info.has_osl = true;
|
||||
info.has_profiling = true;
|
||||
@@ -332,6 +332,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
}
|
||||
|
||||
/* Accumulate device info. */
|
||||
info.has_half_images &= device.has_half_images;
|
||||
info.has_nanovdb &= device.has_nanovdb;
|
||||
info.has_osl &= device.has_osl;
|
||||
info.has_profiling &= device.has_profiling;
|
||||
@@ -362,11 +363,10 @@ unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const CPUKernels &Device::get_cpu_kernels()
|
||||
const CPUKernels *Device::get_cpu_kernels() const
|
||||
{
|
||||
/* Initialize CPU kernels once and reuse. */
|
||||
static CPUKernels kernels;
|
||||
return kernels;
|
||||
LOG(FATAL) << "Device does not support CPU kernels.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Device::get_cpu_kernel_thread_globals(
|
||||
|
@@ -73,6 +73,7 @@ class DeviceInfo {
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_half_images; /* Support half-float textures. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||
@@ -89,6 +90,7 @@ class DeviceInfo {
|
||||
num = 0;
|
||||
cpu_threads = 0;
|
||||
display_device = false;
|
||||
has_half_images = false;
|
||||
has_nanovdb = false;
|
||||
has_osl = false;
|
||||
has_profiling = false;
|
||||
@@ -178,7 +180,7 @@ class Device {
|
||||
* These may not be used on GPU or multi-devices. */
|
||||
|
||||
/* Get CPU kernel functions for native instruction set. */
|
||||
static const CPUKernels &get_cpu_kernels();
|
||||
virtual const CPUKernels *get_cpu_kernels() const;
|
||||
/* Get kernel globals to pass to kernels. */
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);
|
||||
|
@@ -131,9 +131,9 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!hipSupportsDevice(num)) {
|
||||
continue;
|
||||
}
|
||||
int major;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
|
||||
// TODO : (Arya) What is the last major version we are supporting?
|
||||
|
||||
DeviceInfo info;
|
||||
|
||||
@@ -141,6 +141,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
||||
info.description = string(name);
|
||||
info.num = num;
|
||||
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_nanovdb = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
|
@@ -146,18 +146,12 @@ HIPDevice::~HIPDevice()
|
||||
|
||||
bool HIPDevice::support_device(const uint /*kernel_features*/)
|
||||
{
|
||||
if (hipSupportsDevice(hipDevId)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
/* We only support Navi and above. */
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, hipDevId);
|
||||
int major, minor;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
|
||||
set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
|
||||
props.name));
|
||||
return false;
|
||||
}
|
||||
// TODO : (Arya) What versions do we plan to support?
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HIPDevice::check_peer_access(Device *peer_device)
|
||||
@@ -222,6 +216,7 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
const string include_path = source_path;
|
||||
string cflags = string_printf(
|
||||
"-m%d "
|
||||
"--ptxas-options=\"-v\" "
|
||||
"--use_fast_math "
|
||||
"-DHIPCC "
|
||||
"-I\"%s\"",
|
||||
@@ -233,7 +228,10 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
return cflags;
|
||||
}
|
||||
|
||||
string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
|
||||
string HIPDevice::compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base,
|
||||
bool force_ptx)
|
||||
{
|
||||
/* Compute kernel name. */
|
||||
int major, minor;
|
||||
@@ -242,20 +240,35 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, hipDevId);
|
||||
|
||||
/* gcnArchName can contain tokens after the arch name with features, ie.
|
||||
* `gfx1010:sramecc-:xnack-` so we tokenize it to get the first part. */
|
||||
char *arch = strtok(props.gcnArchName, ":");
|
||||
if (arch == NULL) {
|
||||
arch = props.gcnArchName;
|
||||
}
|
||||
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if (!use_adaptive_compilation()) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
return fatbin;
|
||||
if (!force_ptx) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
return fatbin;
|
||||
}
|
||||
}
|
||||
|
||||
/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
|
||||
int ptx_major = major, ptx_minor = minor;
|
||||
while (ptx_major >= 3) {
|
||||
const string ptx = path_get(
|
||||
string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
|
||||
if (path_exists(ptx)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
return ptx;
|
||||
}
|
||||
|
||||
if (ptx_minor > 0) {
|
||||
ptx_minor--;
|
||||
}
|
||||
else {
|
||||
ptx_major--;
|
||||
ptx_minor = 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -279,10 +292,12 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
# ifdef _DEBUG
|
||||
options.append(" -save-temps");
|
||||
# endif
|
||||
options.append(" --amdgpu-target=").append(arch);
|
||||
options.append(" --amdgpu-target=").append(props.gcnArchName);
|
||||
|
||||
const string include_path = source_path;
|
||||
const string fatbin_file = string_printf("cycles_%s_%s_%s", name, arch, kernel_md5.c_str());
|
||||
const char *const kernel_arch = props.gcnArchName;
|
||||
const string fatbin_file = string_printf(
|
||||
"cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
|
||||
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
|
||||
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
@@ -292,9 +307,9 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
|
||||
# ifdef _WIN32
|
||||
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
||||
if (!hipSupportsDevice(hipDevId)) {
|
||||
if (major < 3) {
|
||||
set_error(
|
||||
string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
|
||||
string_printf("HIP backend requires compute capability 3.0 or up, but found %d.%d. "
|
||||
"Your GPU is not supported.",
|
||||
major,
|
||||
minor));
|
||||
@@ -345,9 +360,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
source_path.c_str(),
|
||||
fatbin.c_str());
|
||||
|
||||
printf("Compiling %sHIP kernel ...\n%s\n",
|
||||
(use_adaptive_compilation()) ? "adaptive " : "",
|
||||
command.c_str());
|
||||
printf("Compiling HIP kernel ...\n%s\n", command.c_str());
|
||||
|
||||
# ifdef _WIN32
|
||||
command = "call " + command;
|
||||
@@ -374,14 +387,13 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
|
||||
bool HIPDevice::load_kernels(const uint kernel_features)
|
||||
{
|
||||
/* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
|
||||
/* TODO(sergey): Support kernels re-load for HIP devices.
|
||||
*
|
||||
* Currently re-loading kernels will invalidate memory pointers.
|
||||
* Currently re-loading kernel will invalidate memory pointers,
|
||||
* causing problems in hipCtxSynchronize.
|
||||
*/
|
||||
if (hipModule) {
|
||||
if (use_adaptive_compilation()) {
|
||||
VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
|
||||
}
|
||||
VLOG(1) << "Skipping kernel reload, not currently supported.";
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -390,9 +402,8 @@ bool HIPDevice::load_kernels(const uint kernel_features)
|
||||
return false;
|
||||
|
||||
/* check if GPU is supported */
|
||||
if (!support_device(kernel_features)) {
|
||||
if (!support_device(kernel_features))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* get kernel */
|
||||
const char *kernel_name = "kernel";
|
||||
@@ -897,6 +908,7 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
/* General variables for both architectures */
|
||||
string bind_name = mem.name;
|
||||
size_t dsize = datatype_size(mem.data_type);
|
||||
size_t size = mem.memory_size();
|
||||
@@ -1061,6 +1073,7 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
|
||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||
/* Kepler+, bindless textures. */
|
||||
hipResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
@@ -1151,8 +1164,6 @@ bool HIPDevice::should_use_graphics_interop()
|
||||
* possible, but from the empiric measurements it can be considerably slower than using naive
|
||||
* pixels copy. */
|
||||
|
||||
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
|
||||
# if 0
|
||||
HIPContextScope scope(this);
|
||||
|
||||
int num_all_devices = 0;
|
||||
@@ -1171,7 +1182,6 @@ bool HIPDevice::should_use_graphics_interop()
|
||||
return true;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@@ -93,7 +93,10 @@ class HIPDevice : public Device {
|
||||
|
||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
|
||||
string compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base = "hip",
|
||||
bool force_ptx = false);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
@@ -48,7 +48,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
HIPDeviceQueue *queue_ = nullptr;
|
||||
HIPDevice *device_ = nullptr;
|
||||
|
||||
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
|
||||
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
|
||||
uint opengl_pbo_id_ = 0;
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
@@ -58,15 +58,6 @@ const char *hipewCompilerPath();
|
||||
int hipewCompilerVersion();
|
||||
# endif /* WITH_HIP_DYNLOAD */
|
||||
|
||||
static inline bool hipSupportsDevice(const int hipDevId)
|
||||
{
|
||||
int major, minor;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
|
||||
return (major > 10) || (major == 10 && minor >= 1);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
||||
|
@@ -48,6 +48,14 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
||||
{
|
||||
}
|
||||
|
||||
OptiXDevice::Denoiser::~Denoiser()
|
||||
{
|
||||
const CUDAContextScope scope(device);
|
||||
if (optix_denoiser != nullptr) {
|
||||
optixDenoiserDestroy(optix_denoiser);
|
||||
}
|
||||
}
|
||||
|
||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: CUDADevice(info, stats, profiler),
|
||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||
@@ -83,7 +91,6 @@ OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
};
|
||||
# endif
|
||||
if (DebugFlags().optix.use_debug) {
|
||||
VLOG(1) << "Using OptiX debug mode.";
|
||||
options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
|
||||
}
|
||||
optix_assert(optixDeviceContextCreate(cuContext, &options, &context));
|
||||
@@ -125,11 +132,6 @@ OptiXDevice::~OptiXDevice()
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure denoiser is destroyed before device context! */
|
||||
if (denoiser_.optix_denoiser != nullptr) {
|
||||
optixDenoiserDestroy(denoiser_.optix_denoiser);
|
||||
}
|
||||
|
||||
optixDeviceContextDestroy(context);
|
||||
}
|
||||
|
||||
@@ -881,31 +883,27 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
||||
optix_assert(optixDenoiserComputeMemoryResources(
|
||||
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
|
||||
|
||||
/* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
|
||||
denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
|
||||
denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes;
|
||||
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
||||
|
||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
||||
|
||||
/* Initialize denoiser state for the current tile size. */
|
||||
const OptixResult result = optixDenoiserSetup(
|
||||
denoiser_.optix_denoiser,
|
||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||
on a stream that is not the default stream */
|
||||
buffer_params.width,
|
||||
buffer_params.height,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
||||
denoiser_.scratch_size);
|
||||
const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser,
|
||||
denoiser_.queue.stream(),
|
||||
buffer_params.width,
|
||||
buffer_params.height,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.state.device_pointer +
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.scratch_size);
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
set_error("Failed to set up OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
denoiser_.is_configured = true;
|
||||
denoiser_.configured_size.x = buffer_params.width;
|
||||
denoiser_.configured_size.y = buffer_params.height;
|
||||
@@ -940,6 +938,8 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
||||
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
device_vector<float> fake_albedo(this, "fake_albedo", MEM_READ_WRITE);
|
||||
|
||||
/* Optional albedo and color passes. */
|
||||
if (context.num_input_passes > 1) {
|
||||
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||
@@ -970,7 +970,6 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
||||
|
||||
/* Finally run denoising. */
|
||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||
|
||||
OptixDenoiserLayer image_layers = {};
|
||||
image_layers.input = color_layer;
|
||||
image_layers.output = output_layer;
|
||||
|
@@ -82,6 +82,7 @@ class OptiXDevice : public CUDADevice {
|
||||
class Denoiser {
|
||||
public:
|
||||
explicit Denoiser(OptiXDevice *device);
|
||||
~Denoiser();
|
||||
|
||||
OptiXDevice *device;
|
||||
OptiXDeviceQueue queue;
|
||||
|
@@ -73,8 +73,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
||||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||
|
@@ -29,14 +29,23 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
||||
{
|
||||
DCHECK(params.use);
|
||||
|
||||
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
|
||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||
switch (params.type) {
|
||||
case DENOISER_OPTIX:
|
||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||
|
||||
case DENOISER_OPENIMAGEDENOISE:
|
||||
return make_unique<OIDNDenoiser>(path_trace_device, params);
|
||||
|
||||
case DENOISER_NUM:
|
||||
case DENOISER_NONE:
|
||||
case DENOISER_ALL:
|
||||
/* pass */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Always fallback to OIDN. */
|
||||
DenoiseParams oidn_params = params;
|
||||
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
|
||||
return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
|
||||
LOG(FATAL) << "Unhandled denoiser type " << params.type << ", should never happen.";
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
|
@@ -138,6 +138,10 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pass_access_info_.offset == PASS_UNUSED) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const PassType type = pass_access_info_.type;
|
||||
const PassMode mode = pass_access_info_.mode;
|
||||
const PassInfo pass_info = Pass::get_info(type, pass_access_info_.include_albedo);
|
||||
|
@@ -14,12 +14,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
|
||||
#include "session/buffers.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/tbb.h"
|
||||
|
||||
@@ -36,16 +33,70 @@ CCL_NAMESPACE_BEGIN
|
||||
* Kernel processing.
|
||||
*/
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
{
|
||||
KernelFilmConvert kfilm_convert;
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
|
||||
|
||||
if (destination.pixels) {
|
||||
/* NOTE: No overlays are applied since they are not used for final renders.
|
||||
* Can be supported via some sort of specialization to avoid code duplication. */
|
||||
|
||||
run_get_pass_kernel_processor_float(
|
||||
&kfilm_convert, render_buffers, buffer_params, destination, processor);
|
||||
}
|
||||
|
||||
if (destination.pixels_half_rgba) {
|
||||
/* TODO(sergey): Consider adding specialization to avoid per-pixel overlay check. */
|
||||
|
||||
if (destination.num_components == 1) {
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
|
||||
render_buffers,
|
||||
buffer_params,
|
||||
destination,
|
||||
[&processor](const KernelFilmConvert *kfilm_convert,
|
||||
ccl_global const float *buffer,
|
||||
float *pixel_rgba) {
|
||||
float pixel;
|
||||
processor(kfilm_convert, buffer, &pixel);
|
||||
|
||||
pixel_rgba[0] = pixel;
|
||||
pixel_rgba[1] = pixel;
|
||||
pixel_rgba[2] = pixel;
|
||||
pixel_rgba[3] = 1.0f;
|
||||
});
|
||||
}
|
||||
else if (destination.num_components == 3) {
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
|
||||
render_buffers,
|
||||
buffer_params,
|
||||
destination,
|
||||
[&processor](const KernelFilmConvert *kfilm_convert,
|
||||
ccl_global const float *buffer,
|
||||
float *pixel_rgba) {
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
pixel_rgba[3] = 1.0f;
|
||||
});
|
||||
}
|
||||
else if (destination.num_components == 4) {
|
||||
run_get_pass_kernel_processor_half_rgba(
|
||||
&kfilm_convert, render_buffers, buffer_params, destination, processor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertFunction func) const
|
||||
const Processor &processor) const
|
||||
{
|
||||
/* NOTE: No overlays are applied since they are not used for final renders.
|
||||
* Can be supported via some sort of specialization to avoid code duplication. */
|
||||
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
@@ -61,16 +112,21 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
float *pixel = destination.pixels +
|
||||
(y * buffer_params.width + destination.offset) * pixel_stride;
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride);
|
||||
|
||||
for (int64_t x = 0; x < buffer_params.window_width;
|
||||
++x, buffer += pass_stride, pixel += pixel_stride) {
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertHalfRGBAFunction func) const
|
||||
const Processor &processor) const
|
||||
{
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
@@ -85,7 +141,16 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
half4 *pixel = dst_start + y * destination_stride;
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride);
|
||||
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
|
||||
|
||||
float pixel_rgba[4];
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
*pixel = float4_to_half4_display(
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -98,25 +163,8 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const BufferParams &buffer_params, \
|
||||
const Destination &destination) const \
|
||||
{ \
|
||||
const CPUKernels &kernels = Device::get_cpu_kernels(); \
|
||||
KernelFilmConvert kfilm_convert; \
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination); \
|
||||
\
|
||||
if (destination.pixels) { \
|
||||
run_get_pass_kernel_processor_float(&kfilm_convert, \
|
||||
render_buffers, \
|
||||
buffer_params, \
|
||||
destination, \
|
||||
kernels.film_convert_##pass); \
|
||||
} \
|
||||
\
|
||||
if (destination.pixels_half_rgba) { \
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert, \
|
||||
render_buffers, \
|
||||
buffer_params, \
|
||||
destination, \
|
||||
kernels.film_convert_half_rgba_##pass); \
|
||||
} \
|
||||
run_get_pass_kernel_processor( \
|
||||
render_buffers, buffer_params, destination, film_get_pass_pixel_##pass); \
|
||||
}
|
||||
|
||||
/* Float (scalar) passes. */
|
||||
|
@@ -16,8 +16,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "device/cpu/kernel.h"
|
||||
|
||||
#include "integrator/pass_accessor.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
@@ -30,19 +28,25 @@ class PassAccessorCPU : public PassAccessor {
|
||||
using PassAccessor::PassAccessor;
|
||||
|
||||
protected:
|
||||
inline void run_get_pass_kernel_processor_float(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertFunction func) const;
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
inline void run_get_pass_kernel_processor_half_rgba(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertHalfRGBAFunction func) const;
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor_float(const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor_half_rgba(const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
#define DECLARE_PASS_ACCESSOR(pass) \
|
||||
virtual void get_pass_##pass(const RenderBuffers *render_buffers, \
|
||||
|
@@ -380,10 +380,7 @@ void PathTrace::path_trace(RenderWork &render_work)
|
||||
PathTraceWork *path_trace_work = path_trace_works_[i].get();
|
||||
|
||||
PathTraceWork::RenderStatistics statistics;
|
||||
path_trace_work->render_samples(statistics,
|
||||
render_work.path_trace.start_sample,
|
||||
num_samples,
|
||||
render_work.path_trace.sample_offset);
|
||||
path_trace_work->render_samples(statistics, render_work.path_trace.start_sample, num_samples);
|
||||
|
||||
const double work_time = time_dt() - work_start_time;
|
||||
work_balance_infos_[i].time_spent += work_time;
|
||||
@@ -852,8 +849,7 @@ void PathTrace::progress_update_if_needed(const RenderWork &render_work)
|
||||
const int2 tile_size = get_render_tile_size();
|
||||
const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
|
||||
const int current_sample = render_work.path_trace.start_sample +
|
||||
render_work.path_trace.num_samples -
|
||||
render_work.path_trace.sample_offset;
|
||||
render_work.path_trace.num_samples;
|
||||
progress_->add_samples(num_samples_added, current_sample);
|
||||
}
|
||||
|
||||
|
@@ -75,10 +75,7 @@ class PathTraceWork {
|
||||
|
||||
/* Render given number of samples as a synchronous blocking call.
|
||||
* The samples are added to the render buffer associated with this work. */
|
||||
virtual void render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset) = 0;
|
||||
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num) = 0;
|
||||
|
||||
/* Copy render result from this work to the corresponding place of the GPU display.
|
||||
*
|
||||
|
@@ -58,7 +58,7 @@ PathTraceWorkCPU::PathTraceWorkCPU(Device *device,
|
||||
DeviceScene *device_scene,
|
||||
bool *cancel_requested_flag)
|
||||
: PathTraceWork(device, film, device_scene, cancel_requested_flag),
|
||||
kernels_(Device::get_cpu_kernels())
|
||||
kernels_(*(device->get_cpu_kernels()))
|
||||
{
|
||||
DCHECK_EQ(device->info.type, DEVICE_CPU);
|
||||
}
|
||||
@@ -71,17 +71,14 @@ void PathTraceWorkCPU::init_execution()
|
||||
|
||||
void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset)
|
||||
int samples_num)
|
||||
{
|
||||
const int64_t image_width = effective_buffer_params_.width;
|
||||
const int64_t image_height = effective_buffer_params_.height;
|
||||
const int64_t total_pixels_num = image_width * image_height;
|
||||
|
||||
if (device_->profiler.active()) {
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.start_profiling();
|
||||
}
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.start_profiling();
|
||||
}
|
||||
|
||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||
@@ -100,7 +97,6 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
work_tile.w = 1;
|
||||
work_tile.h = 1;
|
||||
work_tile.start_sample = start_sample;
|
||||
work_tile.sample_offset = sample_offset;
|
||||
work_tile.num_samples = 1;
|
||||
work_tile.offset = effective_buffer_params_.offset;
|
||||
work_tile.stride = effective_buffer_params_.stride;
|
||||
@@ -110,10 +106,9 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
||||
});
|
||||
});
|
||||
if (device_->profiler.active()) {
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.stop_profiling();
|
||||
}
|
||||
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.stop_profiling();
|
||||
}
|
||||
|
||||
statistics.occupancy = 1.0f;
|
||||
|
@@ -48,8 +48,7 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
|
||||
virtual void render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset) override;
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
|
@@ -250,8 +250,7 @@ void PathTraceWorkGPU::init_execution()
|
||||
|
||||
void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset)
|
||||
int samples_num)
|
||||
{
|
||||
/* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to
|
||||
* add more work (because tiles are smaller, so there is higher chance that more paths will
|
||||
@@ -262,7 +261,6 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||
start_sample,
|
||||
samples_num,
|
||||
sample_offset,
|
||||
device_scene_->data.integrator.scrambling_distance);
|
||||
|
||||
enqueue_reset();
|
||||
@@ -439,15 +437,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
||||
DCHECK_LE(work_size, max_num_paths_);
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
||||
/* Closest ray intersection kernels with integrator state and render buffer. */
|
||||
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
||||
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
||||
|
||||
queue_->enqueue(kernel, work_size, args);
|
||||
break;
|
||||
}
|
||||
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
||||
@@ -817,10 +807,10 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
interop_use_ = device->should_use_graphics_interop();
|
||||
|
||||
if (interop_use_) {
|
||||
VLOG(2) << "Using graphics interop GPU display update.";
|
||||
VLOG(2) << "Will be using graphics interop GPU display update.";
|
||||
}
|
||||
else {
|
||||
VLOG(2) << "Using naive GPU display update.";
|
||||
VLOG(2) << "Will be using naive GPU display update.";
|
||||
}
|
||||
|
||||
interop_use_checked_ = true;
|
||||
|
@@ -46,8 +46,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
|
||||
virtual void render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset) override;
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
|
@@ -88,16 +88,6 @@ int RenderScheduler::get_num_samples() const
|
||||
return num_samples_;
|
||||
}
|
||||
|
||||
void RenderScheduler::set_sample_offset(int sample_offset)
|
||||
{
|
||||
sample_offset_ = sample_offset;
|
||||
}
|
||||
|
||||
int RenderScheduler::get_sample_offset() const
|
||||
{
|
||||
return sample_offset_;
|
||||
}
|
||||
|
||||
void RenderScheduler::set_time_limit(double time_limit)
|
||||
{
|
||||
time_limit_ = time_limit;
|
||||
@@ -120,15 +110,13 @@ int RenderScheduler::get_num_rendered_samples() const
|
||||
return state_.num_rendered_samples;
|
||||
}
|
||||
|
||||
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
|
||||
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
|
||||
{
|
||||
buffer_params_ = buffer_params;
|
||||
|
||||
update_start_resolution_divider();
|
||||
|
||||
set_num_samples(num_samples);
|
||||
set_start_sample(sample_offset);
|
||||
set_sample_offset(sample_offset);
|
||||
|
||||
/* In background mode never do lower resolution render preview, as it is not really supported
|
||||
* by the software. */
|
||||
@@ -183,7 +171,7 @@ void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples,
|
||||
|
||||
void RenderScheduler::reset_for_next_tile()
|
||||
{
|
||||
reset(buffer_params_, num_samples_, sample_offset_);
|
||||
reset(buffer_params_, num_samples_);
|
||||
}
|
||||
|
||||
bool RenderScheduler::render_work_reschedule_on_converge(RenderWork &render_work)
|
||||
@@ -329,7 +317,6 @@ RenderWork RenderScheduler::get_render_work()
|
||||
|
||||
render_work.path_trace.start_sample = get_start_sample_to_path_trace();
|
||||
render_work.path_trace.num_samples = get_num_samples_to_path_trace();
|
||||
render_work.path_trace.sample_offset = get_sample_offset();
|
||||
|
||||
render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
|
||||
|
||||
|
@@ -39,7 +39,6 @@ class RenderWork {
|
||||
struct {
|
||||
int start_sample = 0;
|
||||
int num_samples = 0;
|
||||
int sample_offset = 0;
|
||||
} path_trace;
|
||||
|
||||
struct {
|
||||
@@ -126,9 +125,6 @@ class RenderScheduler {
|
||||
void set_num_samples(int num_samples);
|
||||
int get_num_samples() const;
|
||||
|
||||
void set_sample_offset(int sample_offset);
|
||||
int get_sample_offset() const;
|
||||
|
||||
/* Time limit for the path tracing tasks, in minutes.
|
||||
* Zero disables the limit. */
|
||||
void set_time_limit(double time_limit);
|
||||
@@ -154,7 +150,7 @@ class RenderScheduler {
|
||||
|
||||
/* Reset scheduler, indicating that rendering will happen from scratch.
|
||||
* Resets current rendered state, as well as scheduling information. */
|
||||
void reset(const BufferParams &buffer_params, int num_samples, int sample_offset);
|
||||
void reset(const BufferParams &buffer_params, int num_samples);
|
||||
|
||||
/* Reset scheduler upon switching to a next tile.
|
||||
* Will keep the same number of samples and full-frame render parameters, but will reset progress
|
||||
@@ -423,8 +419,6 @@ class RenderScheduler {
|
||||
int start_sample_ = 0;
|
||||
int num_samples_ = 0;
|
||||
|
||||
int sample_offset_ = 0;
|
||||
|
||||
/* Limit in seconds for how long path tracing is allowed to happen.
|
||||
* Zero means no limit is applied. */
|
||||
double time_limit_ = 0.0;
|
||||
|
@@ -96,7 +96,7 @@ bool ShaderEval::eval_cpu(Device *device,
|
||||
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
|
||||
|
||||
/* Find required kernel function. */
|
||||
const CPUKernels &kernels = Device::get_cpu_kernels();
|
||||
const CPUKernels &kernels = *(device->get_cpu_kernels());
|
||||
|
||||
/* Simple parallel_for over all work items. */
|
||||
KernelShaderEvalInput *input_data = input.data();
|
||||
|
@@ -36,7 +36,6 @@ void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
||||
void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
||||
int sample_start,
|
||||
int samples_num,
|
||||
int sample_offset,
|
||||
float scrambling_distance)
|
||||
{
|
||||
/* Image buffer parameters. */
|
||||
@@ -52,7 +51,6 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
||||
/* Samples parameters. */
|
||||
sample_start_ = sample_start;
|
||||
samples_num_ = samples_num;
|
||||
sample_offset_ = sample_offset;
|
||||
|
||||
/* Initialize new scheduling. */
|
||||
reset_scheduler_state();
|
||||
@@ -113,7 +111,6 @@ bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_
|
||||
work_tile.h = tile_size_.height;
|
||||
work_tile.start_sample = sample_start_ + start_sample;
|
||||
work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
|
||||
work_tile.sample_offset = sample_offset_;
|
||||
work_tile.offset = offset_;
|
||||
work_tile.stride = stride_;
|
||||
|
||||
|
@@ -41,7 +41,6 @@ class WorkTileScheduler {
|
||||
void reset(const BufferParams &buffer_params,
|
||||
int sample_start,
|
||||
int samples_num,
|
||||
int sample_offset,
|
||||
float scrambling_distance);
|
||||
|
||||
/* Get work for a device.
|
||||
@@ -80,7 +79,6 @@ class WorkTileScheduler {
|
||||
* (splitting into a smaller work tiles). */
|
||||
int sample_start_ = 0;
|
||||
int samples_num_ = 0;
|
||||
int sample_offset_ = 0;
|
||||
|
||||
/* Tile size which be scheduled for rendering. */
|
||||
TileSize tile_size_;
|
||||
|
@@ -39,10 +39,6 @@ set(SRC_KERNEL_DEVICE_HIP
|
||||
device/hip/kernel.cpp
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_METAL
|
||||
device/metal/kernel.metal
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_OPTIX
|
||||
device/optix/kernel.cu
|
||||
device/optix/kernel_shader_raytrace.cu
|
||||
@@ -83,13 +79,6 @@ set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
||||
device/optix/globals.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
||||
device/metal/compat.h
|
||||
device/metal/context_begin.h
|
||||
device/metal/context_end.h
|
||||
device/metal/globals.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||
closure/alloc.h
|
||||
closure/bsdf.h
|
||||
@@ -734,14 +723,12 @@ cycles_add_library(cycles_kernel "${LIB}"
|
||||
${SRC_KERNEL_DEVICE_CUDA}
|
||||
${SRC_KERNEL_DEVICE_HIP}
|
||||
${SRC_KERNEL_DEVICE_OPTIX}
|
||||
${SRC_KERNEL_DEVICE_METAL}
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_METAL_HEADERS}
|
||||
)
|
||||
|
||||
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
||||
@@ -753,7 +740,6 @@ source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_
|
||||
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
||||
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
||||
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
||||
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
|
||||
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
||||
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||
@@ -786,8 +772,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
||||
|
@@ -18,7 +18,6 @@
|
||||
|
||||
/* CPU Kernel Interface */
|
||||
|
||||
#include "util/half.h"
|
||||
#include "util/types.h"
|
||||
|
||||
#include "kernel/types.h"
|
||||
|
@@ -37,7 +37,7 @@
|
||||
|
||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
||||
KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
|
||||
@@ -52,37 +52,6 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel);
|
||||
#undef KERNEL_INTEGRATOR_INIT_FUNCTION
|
||||
#undef KERNEL_INTEGRATOR_SHADE_FUNCTION
|
||||
|
||||
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
float *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride, \
|
||||
const int pixel_stride); \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
|
||||
const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
half4 *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride);
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Shader evaluation.
|
||||
*/
|
||||
|
@@ -47,8 +47,8 @@
|
||||
# include "kernel/integrator/megakernel.h"
|
||||
|
||||
# include "kernel/film/adaptive_sampling.h"
|
||||
# include "kernel/film/id_passes.h"
|
||||
# include "kernel/film/read.h"
|
||||
# include "kernel/film/id_passes.h"
|
||||
|
||||
# include "kernel/bake/bake.h"
|
||||
|
||||
@@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
|
||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
|
||||
DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
|
||||
DEFINE_INTEGRATOR_KERNEL(intersect_closest)
|
||||
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
|
||||
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
|
||||
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
|
||||
@@ -232,85 +232,6 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Film Convert.
|
||||
*/
|
||||
|
||||
#ifdef KERNEL_STUB
|
||||
|
||||
# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
float *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride, \
|
||||
const int pixel_stride) \
|
||||
{ \
|
||||
STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
|
||||
} \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
|
||||
const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
half4 *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride) \
|
||||
{ \
|
||||
STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
float *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride, \
|
||||
const int pixel_stride) \
|
||||
{ \
|
||||
for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
|
||||
film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
|
||||
} \
|
||||
} \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
|
||||
const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
half4 *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride) \
|
||||
{ \
|
||||
for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
|
||||
float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
|
||||
film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
|
||||
if (is_float) { \
|
||||
pixel_rgba[1] = pixel_rgba[0]; \
|
||||
pixel_rgba[2] = pixel_rgba[0]; \
|
||||
} \
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
|
||||
*pixel = float4_to_half4_display( \
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float, true)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3, false)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4, false)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
#undef KERNEL_INVOKE
|
||||
#undef DEFINE_INTEGRATOR_KERNEL
|
||||
#undef DEFINE_INTEGRATOR_SHADE_KERNEL
|
||||
|
@@ -75,7 +75,6 @@ typedef unsigned long long uint64_t;
|
||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||
#define ccl_gpu_warp_size (warpSize)
|
||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||
|
||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||
|
@@ -92,29 +92,12 @@
|
||||
|
||||
/* Compute number of threads per block and minimum blocks per multiprocessor
|
||||
* given the maximum number of registers per thread. */
|
||||
|
||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
||||
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
||||
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
||||
(block_num_threads * thread_num_registers))
|
||||
|
||||
#define ccl_gpu_kernel_threads(block_num_threads) \
|
||||
extern "C" __global__ void __launch_bounds__(block_num_threads)
|
||||
|
||||
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
|
||||
|
||||
#define ccl_gpu_kernel_call(x) x
|
||||
|
||||
/* Define a function object where "func" is the lambda body, and additional parameters are used to
|
||||
* specify captured state */
|
||||
#define ccl_gpu_kernel_lambda(func, ...) \
|
||||
struct KernelLambda { \
|
||||
__VA_ARGS__; \
|
||||
__device__ int operator()(const int state) \
|
||||
{ \
|
||||
return (func); \
|
||||
} \
|
||||
} ccl_gpu_kernel_lambda_pass
|
||||
|
||||
/* sanity checks */
|
||||
|
||||
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
||||
|
@@ -65,9 +65,7 @@ ccl_device float cubic_h1(float a)
|
||||
|
||||
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
|
||||
template<typename T>
|
||||
ccl_device_noinline T kernel_tex_image_interp_bicubic(ccl_global const TextureInfo &info,
|
||||
float x,
|
||||
float y)
|
||||
ccl_device_noinline T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y)
|
||||
{
|
||||
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
||||
|
||||
@@ -96,7 +94,7 @@ ccl_device_noinline T kernel_tex_image_interp_bicubic(ccl_global const TextureIn
|
||||
/* Fast tricubic texture lookup using 8 trilinear lookups. */
|
||||
template<typename T>
|
||||
ccl_device_noinline T
|
||||
kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, float y, float z)
|
||||
kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
|
||||
{
|
||||
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
||||
|
||||
@@ -171,7 +169,7 @@ ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, fl
|
||||
|
||||
template<typename T>
|
||||
ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
||||
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
|
||||
const TextureInfo &info, float x, float y, float z, uint interpolation)
|
||||
{
|
||||
using namespace nanovdb;
|
||||
|
||||
@@ -193,7 +191,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
||||
|
||||
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
|
||||
{
|
||||
ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||
|
||||
/* float4, byte4, ushort4 and half4 */
|
||||
const int texture_type = info.data_type;
|
||||
@@ -228,7 +226,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
||||
float3 P,
|
||||
InterpolationType interp)
|
||||
{
|
||||
ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||
|
||||
if (info.use_transform_3d) {
|
||||
P = transform_point(&info.transform_3d, P);
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -31,43 +31,10 @@ CCL_NAMESPACE_BEGIN
|
||||
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_METAL__
|
||||
struct ActiveIndexContext {
|
||||
ActiveIndexContext(int _thread_index,
|
||||
int _global_index,
|
||||
int _threadgroup_size,
|
||||
int _simdgroup_size,
|
||||
int _simd_lane_index,
|
||||
int _simd_group_index,
|
||||
int _num_simd_groups,
|
||||
threadgroup int *_simdgroup_offset)
|
||||
: thread_index(_thread_index),
|
||||
global_index(_global_index),
|
||||
blocksize(_threadgroup_size),
|
||||
ccl_gpu_warp_size(_simdgroup_size),
|
||||
thread_warp(_simd_lane_index),
|
||||
warp_index(_simd_group_index),
|
||||
num_warps(_num_simd_groups),
|
||||
warp_offset(_simdgroup_offset)
|
||||
{
|
||||
}
|
||||
|
||||
const int thread_index, global_index, blocksize, ccl_gpu_warp_size, thread_warp, warp_index,
|
||||
num_warps;
|
||||
threadgroup int *warp_offset;
|
||||
|
||||
template<uint blocksizeDummy, typename IsActiveOp>
|
||||
void active_index_array(const uint num_states,
|
||||
ccl_global int *indices,
|
||||
ccl_global int *num_indices,
|
||||
IsActiveOp is_active_op)
|
||||
{
|
||||
const uint state_index = global_index;
|
||||
#else
|
||||
template<uint blocksize, typename IsActiveOp>
|
||||
__device__ void gpu_parallel_active_index_array(const uint num_states,
|
||||
ccl_global int *indices,
|
||||
ccl_global int *num_indices,
|
||||
int *indices,
|
||||
int *num_indices,
|
||||
IsActiveOp is_active_op)
|
||||
{
|
||||
extern ccl_gpu_shared int warp_offset[];
|
||||
@@ -78,62 +45,43 @@ __device__ void gpu_parallel_active_index_array(const uint num_states,
|
||||
const uint warp_index = thread_index / ccl_gpu_warp_size;
|
||||
const uint num_warps = blocksize / ccl_gpu_warp_size;
|
||||
|
||||
/* Test if state corresponding to this thread is active. */
|
||||
const uint state_index = ccl_gpu_block_idx_x * blocksize + thread_index;
|
||||
#endif
|
||||
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
||||
|
||||
/* Test if state corresponding to this thread is active. */
|
||||
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
||||
/* For each thread within a warp compute how many other active states precede it. */
|
||||
const uint thread_mask = 0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp);
|
||||
const uint thread_offset = ccl_gpu_popc(ccl_gpu_ballot(is_active) & thread_mask);
|
||||
|
||||
/* For each thread within a warp compute how many other active states precede it. */
|
||||
const uint thread_offset = ccl_gpu_popc(ccl_gpu_ballot(is_active) &
|
||||
ccl_gpu_thread_mask(thread_warp));
|
||||
|
||||
/* Last thread in warp stores number of active states for each warp. */
|
||||
if (thread_warp == ccl_gpu_warp_size - 1) {
|
||||
warp_offset[warp_index] = thread_offset + is_active;
|
||||
}
|
||||
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* Last thread in block converts per-warp sizes to offsets, increments global size of
|
||||
* index array and gets offset to write to. */
|
||||
if (thread_index == blocksize - 1) {
|
||||
/* TODO: parallelize this. */
|
||||
int offset = 0;
|
||||
for (int i = 0; i < num_warps; i++) {
|
||||
int num_active = warp_offset[i];
|
||||
warp_offset[i] = offset;
|
||||
offset += num_active;
|
||||
}
|
||||
|
||||
const uint block_num_active = warp_offset[warp_index] + thread_offset + is_active;
|
||||
warp_offset[num_warps] = atomic_fetch_and_add_uint32(num_indices, block_num_active);
|
||||
}
|
||||
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* Write to index array. */
|
||||
if (is_active) {
|
||||
const uint block_offset = warp_offset[num_warps];
|
||||
indices[block_offset + warp_offset[warp_index] + thread_offset] = state_index;
|
||||
}
|
||||
/* Last thread in warp stores number of active states for each warp. */
|
||||
if (thread_warp == ccl_gpu_warp_size - 1) {
|
||||
warp_offset[warp_index] = thread_offset + is_active;
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_METAL__
|
||||
}; /* end class ActiveIndexContext */
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* inject the required thread params into a struct, and redirect to its templated member function
|
||||
*/
|
||||
# define gpu_parallel_active_index_array \
|
||||
ActiveIndexContext(metal_local_id, \
|
||||
metal_global_id, \
|
||||
metal_local_size, \
|
||||
simdgroup_size, \
|
||||
simd_lane_index, \
|
||||
simd_group_index, \
|
||||
num_simd_groups, \
|
||||
simdgroup_offset) \
|
||||
.active_index_array
|
||||
#endif
|
||||
/* Last thread in block converts per-warp sizes to offsets, increments global size of
|
||||
* index array and gets offset to write to. */
|
||||
if (thread_index == blocksize - 1) {
|
||||
/* TODO: parallelize this. */
|
||||
int offset = 0;
|
||||
for (int i = 0; i < num_warps; i++) {
|
||||
int num_active = warp_offset[i];
|
||||
warp_offset[i] = offset;
|
||||
offset += num_active;
|
||||
}
|
||||
|
||||
const uint block_num_active = warp_offset[warp_index] + thread_offset + is_active;
|
||||
warp_offset[num_warps] = atomic_fetch_and_add_uint32(num_indices, block_num_active);
|
||||
}
|
||||
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* Write to index array. */
|
||||
if (is_active) {
|
||||
const uint block_offset = warp_offset[num_warps];
|
||||
indices[block_offset + warp_offset[warp_index] + thread_offset] = state_index;
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -33,12 +33,10 @@ CCL_NAMESPACE_BEGIN
|
||||
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
|
||||
#endif
|
||||
|
||||
__device__ void gpu_parallel_prefix_sum(const int global_id,
|
||||
ccl_global int *counter,
|
||||
ccl_global int *prefix_sum,
|
||||
const int num_values)
|
||||
template<uint blocksize>
|
||||
__device__ void gpu_parallel_prefix_sum(int *counter, int *prefix_sum, const int num_values)
|
||||
{
|
||||
if (global_id != 0) {
|
||||
if (!(ccl_gpu_block_idx_x == 0 && ccl_gpu_thread_idx_x == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -33,16 +33,16 @@ CCL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
||||
|
||||
template<typename GetKeyOp>
|
||||
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
|
||||
const uint num_states,
|
||||
template<uint blocksize, typename GetKeyOp>
|
||||
__device__ void gpu_parallel_sorted_index_array(const uint num_states,
|
||||
const int num_states_limit,
|
||||
ccl_global int *indices,
|
||||
ccl_global int *num_indices,
|
||||
ccl_global int *key_counter,
|
||||
ccl_global int *key_prefix_sum,
|
||||
int *indices,
|
||||
int *num_indices,
|
||||
int *key_counter,
|
||||
int *key_prefix_sum,
|
||||
GetKeyOp get_key_op)
|
||||
{
|
||||
const uint state_index = ccl_gpu_block_idx_x * blocksize + ccl_gpu_thread_idx_x;
|
||||
const int key = (state_index < num_states) ? get_key_op(state_index) :
|
||||
GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
|
||||
|
||||
|
@@ -74,7 +74,6 @@ typedef unsigned long long uint64_t;
|
||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||
#define ccl_gpu_warp_size (warpSize)
|
||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||
|
||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||
|
@@ -35,29 +35,12 @@
|
||||
|
||||
/* Compute number of threads per block and minimum blocks per multiprocessor
|
||||
* given the maximum number of registers per thread. */
|
||||
|
||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
||||
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
||||
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
||||
(block_num_threads * thread_num_registers))
|
||||
|
||||
#define ccl_gpu_kernel_threads(block_num_threads) \
|
||||
extern "C" __global__ void __launch_bounds__(block_num_threads)
|
||||
|
||||
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
|
||||
|
||||
#define ccl_gpu_kernel_call(x) x
|
||||
|
||||
/* Define a function object where "func" is the lambda body, and additional parameters are used to
|
||||
* specify captured state */
|
||||
#define ccl_gpu_kernel_lambda(func, ...) \
|
||||
struct KernelLambda { \
|
||||
__VA_ARGS__; \
|
||||
__device__ int operator()(const int state) \
|
||||
{ \
|
||||
return (func); \
|
||||
} \
|
||||
} ccl_gpu_kernel_lambda_pass
|
||||
|
||||
/* sanity checks */
|
||||
|
||||
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
||||
|
@@ -58,98 +58,6 @@ using namespace metal;
|
||||
|
||||
#define kernel_assert(cond)
|
||||
|
||||
#define ccl_gpu_global_id_x() metal_global_id
|
||||
#define ccl_gpu_warp_size simdgroup_size
|
||||
#define ccl_gpu_thread_idx_x simd_group_index
|
||||
#define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1)
|
||||
|
||||
#define ccl_gpu_ballot(predicate) ((uint64_t)((simd_vote::vote_t)simd_ballot(predicate)))
|
||||
#define ccl_gpu_popc(x) popcount(x)
|
||||
|
||||
// clang-format off
|
||||
|
||||
/* kernel.h adapters */
|
||||
|
||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers)
|
||||
#define ccl_gpu_kernel_threads(block_num_threads)
|
||||
|
||||
/* Convert a comma-separated list into a semicolon-separated list
|
||||
* (so that we can generate a struct based on kernel entry-point parameters). */
|
||||
#define FN0()
|
||||
#define FN1(p1) p1;
|
||||
#define FN2(p1, p2) p1; p2;
|
||||
#define FN3(p1, p2, p3) p1; p2; p3;
|
||||
#define FN4(p1, p2, p3, p4) p1; p2; p3; p4;
|
||||
#define FN5(p1, p2, p3, p4, p5) p1; p2; p3; p4; p5;
|
||||
#define FN6(p1, p2, p3, p4, p5, p6) p1; p2; p3; p4; p5; p6;
|
||||
#define FN7(p1, p2, p3, p4, p5, p6, p7) p1; p2; p3; p4; p5; p6; p7;
|
||||
#define FN8(p1, p2, p3, p4, p5, p6, p7, p8) p1; p2; p3; p4; p5; p6; p7; p8;
|
||||
#define FN9(p1, p2, p3, p4, p5, p6, p7, p8, p9) p1; p2; p3; p4; p5; p6; p7; p8; p9;
|
||||
#define FN10(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10;
|
||||
#define FN11(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11;
|
||||
#define FN12(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12;
|
||||
#define FN13(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13;
|
||||
#define FN14(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14;
|
||||
#define FN15(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15;
|
||||
#define FN16(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16;
|
||||
#define GET_LAST_ARG(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ...) p16
|
||||
#define PARAMS_MAKER(...) GET_LAST_ARG(__VA_ARGS__, FN16, FN15, FN14, FN13, FN12, FN11, FN10, FN9, FN8, FN7, FN6, FN5, FN4, FN3, FN2, FN1, FN0)
|
||||
|
||||
/* Generate a struct containing the entry-point parameters and a "run"
|
||||
* method which can access them implicitly via this-> */
|
||||
#define ccl_gpu_kernel_signature(name, ...) \
|
||||
struct kernel_gpu_##name \
|
||||
{ \
|
||||
PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
|
||||
void run(thread MetalKernelContext& context, \
|
||||
threadgroup int *simdgroup_offset, \
|
||||
const uint metal_global_id, \
|
||||
const ushort metal_local_id, \
|
||||
const ushort metal_local_size, \
|
||||
uint simdgroup_size, \
|
||||
uint simd_lane_index, \
|
||||
uint simd_group_index, \
|
||||
uint num_simd_groups) ccl_global const; \
|
||||
}; \
|
||||
kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \
|
||||
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
||||
constant MetalAncillaries *_metal_ancillaries, \
|
||||
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
|
||||
const uint metal_global_id [[thread_position_in_grid]], \
|
||||
const ushort metal_local_id [[thread_position_in_threadgroup]], \
|
||||
const ushort metal_local_size [[threads_per_threadgroup]], \
|
||||
uint simdgroup_size [[threads_per_simdgroup]], \
|
||||
uint simd_lane_index [[thread_index_in_simdgroup]], \
|
||||
uint simd_group_index [[simdgroup_index_in_threadgroup]], \
|
||||
uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
|
||||
MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
|
||||
INIT_DEBUG_BUFFER \
|
||||
params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
||||
} \
|
||||
void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||
threadgroup int *simdgroup_offset, \
|
||||
const uint metal_global_id, \
|
||||
const ushort metal_local_id, \
|
||||
const ushort metal_local_size, \
|
||||
uint simdgroup_size, \
|
||||
uint simd_lane_index, \
|
||||
uint simd_group_index, \
|
||||
uint num_simd_groups) ccl_global const
|
||||
|
||||
#define ccl_gpu_kernel_call(x) context.x
|
||||
|
||||
/* define a function object where "func" is the lambda body, and additional parameters are used to specify captured state */
|
||||
#define ccl_gpu_kernel_lambda(func, ...) \
|
||||
struct KernelLambda \
|
||||
{ \
|
||||
KernelLambda(ccl_private MetalKernelContext &_context) : context(_context) {} \
|
||||
ccl_private MetalKernelContext &context; \
|
||||
__VA_ARGS__; \
|
||||
int operator()(const int state) const { return (func); } \
|
||||
} ccl_gpu_kernel_lambda_pass(context)
|
||||
|
||||
// clang-format on
|
||||
|
||||
/* make_type definitions with Metal style element initializers */
|
||||
#ifdef make_float2
|
||||
# undef make_float2
|
||||
@@ -216,38 +124,3 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||
#define logf(x) trigmode::log(float(x))
|
||||
|
||||
#define NULL 0
|
||||
|
||||
/* texture bindings and sampler setup */
|
||||
|
||||
struct Texture2DParamsMetal {
|
||||
texture2d<float, access::sample> tex;
|
||||
};
|
||||
struct Texture3DParamsMetal {
|
||||
texture3d<float, access::sample> tex;
|
||||
};
|
||||
|
||||
struct MetalAncillaries {
|
||||
device Texture2DParamsMetal *textures_2d;
|
||||
device Texture3DParamsMetal *textures_3d;
|
||||
};
|
||||
|
||||
enum SamplerType {
|
||||
SamplerFilterNearest_AddressRepeat,
|
||||
SamplerFilterNearest_AddressClampEdge,
|
||||
SamplerFilterNearest_AddressClampZero,
|
||||
|
||||
SamplerFilterLinear_AddressRepeat,
|
||||
SamplerFilterLinear_AddressClampEdge,
|
||||
SamplerFilterLinear_AddressClampZero,
|
||||
|
||||
SamplerCount
|
||||
};
|
||||
|
||||
constant constexpr array<sampler, SamplerCount> metal_samplers = {
|
||||
sampler(address::repeat, filter::nearest),
|
||||
sampler(address::clamp_to_edge, filter::nearest),
|
||||
sampler(address::clamp_to_zero, filter::nearest),
|
||||
sampler(address::repeat, filter::linear),
|
||||
sampler(address::clamp_to_edge, filter::linear),
|
||||
sampler(address::clamp_to_zero, filter::linear),
|
||||
};
|
||||
|
@@ -1,79 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// clang-format off
|
||||
|
||||
/* Open the Metal kernel context class
|
||||
* Necessary to access resource bindings */
|
||||
class MetalKernelContext {
|
||||
public:
|
||||
constant KernelParamsMetal &launch_params_metal;
|
||||
constant MetalAncillaries *metal_ancillaries;
|
||||
|
||||
MetalKernelContext(constant KernelParamsMetal &_launch_params_metal, constant MetalAncillaries * _metal_ancillaries)
|
||||
: launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries)
|
||||
{}
|
||||
|
||||
/* texture fetch adapter functions */
|
||||
typedef uint64_t ccl_gpu_tex_object;
|
||||
|
||||
template<typename T>
|
||||
inline __attribute__((__always_inline__))
|
||||
T ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
||||
kernel_assert(0);
|
||||
return 0;
|
||||
}
|
||||
template<typename T>
|
||||
inline __attribute__((__always_inline__))
|
||||
T ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
||||
kernel_assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// texture2d
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
|
||||
}
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
|
||||
}
|
||||
|
||||
// texture3d
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
|
||||
}
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
|
||||
}
|
||||
# include "kernel/device/gpu/image.h"
|
||||
|
||||
// clang-format on
|
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
}
|
||||
; /* end of MetalKernelContext class definition */
|
||||
|
||||
/* Silently redirect into the MetalKernelContext instance */
|
||||
/* NOTE: These macros will need maintaining as entry-points change. */
|
||||
|
||||
#undef kernel_integrator_state
|
||||
#define kernel_integrator_state context.launch_params_metal.__integrator_state
|
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* Constant Globals */
|
||||
|
||||
#include "kernel/types.h"
|
||||
#include "kernel/util/profiling.h"
|
||||
|
||||
#include "kernel/integrator/state.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
typedef struct KernelParamsMetal {
|
||||
|
||||
#define KERNEL_TEX(type, name) ccl_constant type *name;
|
||||
#include "kernel/textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
const IntegratorStateGPU __integrator_state;
|
||||
const KernelData data;
|
||||
|
||||
} KernelParamsMetal;
|
||||
|
||||
typedef struct KernelGlobalsGPU {
|
||||
int unused[1];
|
||||
} KernelGlobalsGPU;
|
||||
|
||||
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
|
||||
|
||||
#define kernel_data launch_params_metal.data
|
||||
#define kernel_integrator_state launch_params_metal.__integrator_state
|
||||
|
||||
/* data lookup defines */
|
||||
|
||||
#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index]
|
||||
#define kernel_tex_array(tex) launch_params_metal.tex
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* Metal kernel entry points */
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include "kernel/device/metal/compat.h"
|
||||
#include "kernel/device/metal/globals.h"
|
||||
#include "kernel/device/gpu/kernel.h"
|
||||
|
||||
// clang-format on
|
@@ -76,7 +76,6 @@ typedef unsigned long long uint64_t;
|
||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||
#define ccl_gpu_warp_size (warpSize)
|
||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||
|
||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||
|
@@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
|
||||
integrator_intersect_closest(nullptr, path_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
||||
|
@@ -33,72 +33,62 @@ CCL_NAMESPACE_BEGIN
|
||||
* them separately. */
|
||||
|
||||
ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
|
||||
const ClosureType closure_type,
|
||||
const bool is_diffuse,
|
||||
float3 value)
|
||||
{
|
||||
eval->diffuse = zero_float3();
|
||||
eval->glossy = zero_float3();
|
||||
|
||||
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
|
||||
if (is_diffuse) {
|
||||
eval->diffuse = value;
|
||||
}
|
||||
else if (CLOSURE_IS_BSDF_GLOSSY(closure_type)) {
|
||||
else {
|
||||
eval->glossy = value;
|
||||
}
|
||||
|
||||
eval->sum = value;
|
||||
}
|
||||
|
||||
ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
|
||||
const ClosureType closure_type,
|
||||
float3 value)
|
||||
const bool is_diffuse,
|
||||
float3 value,
|
||||
float mis_weight)
|
||||
{
|
||||
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
|
||||
value *= mis_weight;
|
||||
|
||||
if (is_diffuse) {
|
||||
eval->diffuse += value;
|
||||
}
|
||||
else if (CLOSURE_IS_BSDF_GLOSSY(closure_type)) {
|
||||
else {
|
||||
eval->glossy += value;
|
||||
}
|
||||
|
||||
eval->sum += value;
|
||||
}
|
||||
|
||||
ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
|
||||
{
|
||||
return is_zero(eval->sum);
|
||||
return is_zero(eval->diffuse) && is_zero(eval->glossy);
|
||||
}
|
||||
|
||||
ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
|
||||
{
|
||||
eval->diffuse *= value;
|
||||
eval->glossy *= value;
|
||||
eval->sum *= value;
|
||||
}
|
||||
|
||||
ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
|
||||
{
|
||||
eval->diffuse *= value;
|
||||
eval->glossy *= value;
|
||||
eval->sum *= value;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
|
||||
{
|
||||
return eval->sum;
|
||||
return eval->diffuse + eval->glossy;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval)
|
||||
ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
|
||||
{
|
||||
/* Ratio of diffuse weight to recover proportions for writing to render pass.
|
||||
/* Ratio of diffuse and glossy to recover proportions for writing to render pass.
|
||||
* We assume reflection, transmission and volume scatter to be exclusive. */
|
||||
return safe_divide_float3_float3(eval->diffuse, eval->sum);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval)
|
||||
{
|
||||
/* Ratio of glossy weight to recover proportions for writing to render pass.
|
||||
* We assume reflection, transmission and volume scatter to be exclusive. */
|
||||
return safe_divide_float3_float3(eval->glossy, eval->sum);
|
||||
return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
@@ -151,8 +141,7 @@ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
|
||||
ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
|
||||
ConstIntegratorState state,
|
||||
ccl_global float *ccl_restrict render_buffer,
|
||||
int sample,
|
||||
int sample_offset)
|
||||
int sample)
|
||||
{
|
||||
if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
|
||||
return sample;
|
||||
@@ -160,8 +149,7 @@ ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
|
||||
|
||||
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
||||
|
||||
return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1) +
|
||||
sample_offset;
|
||||
return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
|
||||
}
|
||||
|
||||
ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
|
||||
@@ -363,48 +351,38 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg
|
||||
/* Directly visible, write to emission or background pass. */
|
||||
pass_offset = pass;
|
||||
}
|
||||
else if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
if (path_flag & PATH_RAY_SURFACE_PASS) {
|
||||
/* Indirectly visible through reflection. */
|
||||
const float3 diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||
const float3 glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight);
|
||||
else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
|
||||
/* Indirectly visible through reflection. */
|
||||
const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
|
||||
((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_glossy_direct :
|
||||
kernel_data.film.pass_glossy_indirect) :
|
||||
((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_transmission_direct :
|
||||
kernel_data.film.pass_transmission_indirect);
|
||||
|
||||
/* Glossy */
|
||||
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_glossy_direct :
|
||||
kernel_data.film.pass_glossy_indirect);
|
||||
if (glossy_pass_offset != PASS_UNUSED) {
|
||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
|
||||
}
|
||||
|
||||
/* Transmission */
|
||||
const int transmission_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_transmission_direct :
|
||||
kernel_data.film.pass_transmission_indirect);
|
||||
|
||||
if (transmission_pass_offset != PASS_UNUSED) {
|
||||
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
|
||||
* GPU memory. */
|
||||
const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
|
||||
kernel_write_pass_float3(buffer + transmission_pass_offset,
|
||||
transmission_weight * contribution);
|
||||
}
|
||||
|
||||
/* Reconstruct diffuse subset of throughput. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_diffuse_direct :
|
||||
kernel_data.film.pass_diffuse_indirect;
|
||||
if (pass_offset != PASS_UNUSED) {
|
||||
contribution *= diffuse_weight;
|
||||
}
|
||||
if (glossy_pass_offset != PASS_UNUSED) {
|
||||
/* Glossy is a subset of the throughput, reconstruct it here using the
|
||||
* diffuse-glossy ratio. */
|
||||
const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
||||
const float3 glossy_contribution = (one_float3() - ratio) * contribution;
|
||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
|
||||
}
|
||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||
/* Indirectly visible through volume. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_volume_direct :
|
||||
kernel_data.film.pass_volume_indirect;
|
||||
|
||||
/* Reconstruct diffuse subset of throughput. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_diffuse_direct :
|
||||
kernel_data.film.pass_diffuse_indirect;
|
||||
if (pass_offset != PASS_UNUSED) {
|
||||
contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
||||
}
|
||||
}
|
||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||
/* Indirectly visible through volume. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||
kernel_data.film.pass_volume_direct :
|
||||
kernel_data.film.pass_volume_indirect;
|
||||
}
|
||||
|
||||
/* Single write call for GPU coherence. */
|
||||
if (pass_offset != PASS_UNUSED) {
|
||||
@@ -448,56 +426,45 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
|
||||
#ifdef __PASSES__
|
||||
if (kernel_data.film.light_pass_flag & PASS_ANY) {
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
|
||||
int pass_offset = PASS_UNUSED;
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
int pass_offset = PASS_UNUSED;
|
||||
if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
|
||||
/* Indirectly visible through reflection. */
|
||||
const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
|
||||
((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_glossy_direct :
|
||||
kernel_data.film.pass_glossy_indirect) :
|
||||
((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_transmission_direct :
|
||||
kernel_data.film.pass_transmission_indirect);
|
||||
|
||||
if (path_flag & PATH_RAY_SURFACE_PASS) {
|
||||
/* Indirectly visible through reflection. */
|
||||
const float3 diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight);
|
||||
const float3 glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight);
|
||||
|
||||
/* Glossy */
|
||||
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_glossy_direct :
|
||||
kernel_data.film.pass_glossy_indirect);
|
||||
if (glossy_pass_offset != PASS_UNUSED) {
|
||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
|
||||
}
|
||||
|
||||
/* Transmission */
|
||||
const int transmission_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_transmission_direct :
|
||||
kernel_data.film.pass_transmission_indirect);
|
||||
|
||||
if (transmission_pass_offset != PASS_UNUSED) {
|
||||
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
|
||||
* GPU memory. */
|
||||
const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
|
||||
kernel_write_pass_float3(buffer + transmission_pass_offset,
|
||||
transmission_weight * contribution);
|
||||
}
|
||||
|
||||
/* Reconstruct diffuse subset of throughput. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_diffuse_direct :
|
||||
kernel_data.film.pass_diffuse_indirect;
|
||||
if (pass_offset != PASS_UNUSED) {
|
||||
contribution *= diffuse_weight;
|
||||
}
|
||||
}
|
||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||
/* Indirectly visible through volume. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_volume_direct :
|
||||
kernel_data.film.pass_volume_indirect;
|
||||
if (glossy_pass_offset != PASS_UNUSED) {
|
||||
/* Glossy is a subset of the throughput, reconstruct it here using the
|
||||
* diffuse-glossy ratio. */
|
||||
const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
|
||||
const float3 glossy_contribution = (one_float3() - ratio) * contribution;
|
||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
|
||||
}
|
||||
|
||||
/* Single write call for GPU coherence. */
|
||||
/* Reconstruct diffuse subset of throughput. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_diffuse_direct :
|
||||
kernel_data.film.pass_diffuse_indirect;
|
||||
if (pass_offset != PASS_UNUSED) {
|
||||
kernel_write_pass_float3(buffer + pass_offset, contribution);
|
||||
contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
|
||||
}
|
||||
}
|
||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||
/* Indirectly visible through volume. */
|
||||
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||
kernel_data.film.pass_volume_direct :
|
||||
kernel_data.film.pass_volume_indirect;
|
||||
}
|
||||
|
||||
/* Single write call for GPU coherence. */
|
||||
if (pass_offset != PASS_UNUSED) {
|
||||
kernel_write_pass_float3(buffer + pass_offset, contribution);
|
||||
}
|
||||
|
||||
/* Write shadow pass. */
|
||||
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
|
||||
@@ -573,10 +540,11 @@ ccl_device_inline void kernel_accum_background(KernelGlobals kg,
|
||||
/* Write emission to render buffer. */
|
||||
ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
|
||||
ConstIntegratorState state,
|
||||
const float3 throughput,
|
||||
const float3 L,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
float3 contribution = L;
|
||||
float3 contribution = throughput * L;
|
||||
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
|
||||
|
||||
ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
|
||||
|
@@ -160,6 +160,40 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
|
||||
}
|
||||
#endif /* __DENOISING_FEATURES__ */
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
||||
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
||||
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const ShaderData *sd,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (!kernel_data.integrator.has_shadow_catcher) {
|
||||
return;
|
||||
}
|
||||
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
||||
|
||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
|
||||
return;
|
||||
}
|
||||
|
||||
ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
|
||||
|
||||
/* Count sample for the shadow catcher object. */
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
||||
|
||||
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
||||
* transparency to the matte. */
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
||||
average(throughput));
|
||||
}
|
||||
|
||||
#endif /* __SHADOW_CATCHER__ */
|
||||
|
||||
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
|
||||
size_t depth,
|
||||
float id,
|
||||
|
@@ -65,8 +65,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
||||
}
|
||||
|
||||
/* Always count the sample, even if the camera sample will reject the ray. */
|
||||
const int sample = kernel_accum_sample(
|
||||
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
|
||||
const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
|
||||
|
||||
/* Setup render buffers. */
|
||||
const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
|
||||
|
@@ -89,8 +89,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
|
||||
* This logic allows to both count actual number of samples per pixel, and to add samples to this
|
||||
* pixel after it was converged and samples were added somewhere else (in which case the
|
||||
* `scheduled_sample` will be different from actual number of samples in this pixel). */
|
||||
const int sample = kernel_accum_sample(
|
||||
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
|
||||
const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
|
||||
|
||||
/* Initialize random number seed for path. */
|
||||
const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
|
||||
|
@@ -31,6 +31,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const int shader_flags)
|
||||
@@ -62,7 +63,6 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
||||
* perform MIS as part of indirect rays. */
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
const float probability = path_state_continuation_probability(kg, state, path_flag);
|
||||
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = probability;
|
||||
|
||||
if (probability != 1.0f) {
|
||||
const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
|
||||
@@ -85,80 +85,36 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Split path if a shadow catcher was hit. */
|
||||
ccl_device_forceinline void integrator_split_shadow_catcher(
|
||||
/* Note that current_kernel is a template value since making this a variable
|
||||
* leads to poor performance with CUDA atomics. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_shader_next_kernel(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
const int shader,
|
||||
const int shader_flags)
|
||||
{
|
||||
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
|
||||
* paths from here. */
|
||||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
|
||||
return;
|
||||
}
|
||||
|
||||
kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
|
||||
|
||||
/* Mark state as having done a shadow catcher split so that it stops contributing to
|
||||
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
||||
|
||||
/* Copy current state to new state. */
|
||||
state = integrator_state_shadow_catcher_split(kg, state);
|
||||
|
||||
/* Initialize new state.
|
||||
/* Note on scheduling.
|
||||
*
|
||||
* When there is no shadow catcher split the scheduling is simple: schedule surface shading with
|
||||
* or without raytrace support, depending on the shader used.
|
||||
*
|
||||
* When there is a shadow catcher split the general idea is to have the following configuration:
|
||||
*
|
||||
* - Schedule surface shading kernel (with corresponding raytrace support) for the ray which
|
||||
* will trace shadow catcher object.
|
||||
*
|
||||
* - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for
|
||||
* the matte ray.
|
||||
*
|
||||
* - Otherwise schedule background shading kernel, so that we have a background to alpha-over
|
||||
* on. The background kernel will then schedule surface shading for the matte ray.
|
||||
*
|
||||
* Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for
|
||||
* the matte path. */
|
||||
|
||||
/* Mark current state so that it will only track contribution of shadow catcher objects ignoring
|
||||
* non-catcher objects. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS;
|
||||
|
||||
if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
|
||||
/* If using background pass, schedule background shading kernel so that we have a background
|
||||
* to alpha-over on. The background kernel will then continue the path afterwards. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Continue with shading shadow catcher surface. */
|
||||
const int shader = intersection_get_shader(kg, isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
/* Continue with shading shadow catcher surface. Same as integrator_split_shadow_catcher, but
|
||||
* using NEXT instead of INIT. */
|
||||
Intersection isect ccl_optional_struct_init;
|
||||
integrator_state_read_isect(kg, state, &isect);
|
||||
|
||||
const int shader = intersection_get_shader(kg, &isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
@@ -167,141 +123,26 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after executing background shader for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
/* Same logic as integrator_split_shadow_catcher, but using NEXT instead of INIT. */
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Continue with shading shadow catcher surface. */
|
||||
integrator_intersect_next_kernel_after_shadow_catcher_volume<current_kernel>(kg, state);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Schedule next kernel to be executed after intersect closest.
|
||||
*
|
||||
* Note that current_kernel is a template value since making this a variable
|
||||
* leads to poor performance with CUDA atomics. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
ccl_global float *ccl_restrict render_buffer,
|
||||
const bool hit)
|
||||
{
|
||||
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
|
||||
#ifdef __VOLUME__
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
const bool hit_surface = hit && !(isect->type & PRIMITIVE_LAMP);
|
||||
const int shader = (hit_surface) ? intersection_get_shader(kg, isect) : SHADER_NONE;
|
||||
const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
|
||||
|
||||
if (!integrator_intersect_terminate(kg, state, flags)) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (hit) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if (!integrator_intersect_terminate(kg, state, flags)) {
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Handle shadow catcher. */
|
||||
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
}
|
||||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
if (kernel_shadow_catcher_split(kg, state, object_flags)) {
|
||||
if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
|
||||
/* Schedule next kernel to be executed after shade volume.
|
||||
*
|
||||
* The logic here matches integrator_intersect_next_kernel, except that
|
||||
* volume shading and termination testing have already been done. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (isect->prim != PRIM_NONE) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Handle shadow catcher. */
|
||||
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
|
||||
#endif
|
||||
return;
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device void integrator_intersect_closest(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
|
||||
|
||||
@@ -350,9 +191,56 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
|
||||
/* Write intersection result into global integrator state memory. */
|
||||
integrator_state_write_isect(kg, state, &isect);
|
||||
|
||||
/* Setup up next kernel to be executed. */
|
||||
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, &isect, render_buffer, hit);
|
||||
#ifdef __VOLUME__
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
|
||||
const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
|
||||
const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
|
||||
|
||||
if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, flags)) {
|
||||
/* Continue with volume kernel if we are inside a volume, regardless
|
||||
* if we hit anything. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (hit) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect.type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, &isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, flags)) {
|
||||
integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, &isect, shader, flags);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -42,13 +42,10 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
/* Store to avoid global fetches on every intersection step. */
|
||||
const uint volume_stack_size = kernel_data.volume_stack_size;
|
||||
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
const uint32_t visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, PATH_RAY_ALL_VISIBILITY);
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
|
||||
uint num_hits = scene_intersect_volume_all(
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, PATH_RAY_ALL_VISIBILITY);
|
||||
if (num_hits > 0) {
|
||||
Intersection *isect = hits;
|
||||
|
||||
@@ -63,7 +60,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
Intersection isect;
|
||||
int step = 0;
|
||||
while (step < 2 * volume_stack_size &&
|
||||
scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
|
||||
scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
|
||||
shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
|
||||
volume_stack_enter_exit(kg, state, stack_sd);
|
||||
|
||||
@@ -77,7 +74,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState state)
|
||||
ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
|
||||
|
||||
@@ -86,26 +83,16 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
|
||||
Ray volume_ray ccl_optional_struct_init;
|
||||
integrator_state_read_ray(kg, state, &volume_ray);
|
||||
|
||||
/* Trace ray in random direction. Any direction works, Z up is a guess to get the
|
||||
* fewest hits. */
|
||||
volume_ray.D = make_float3(0.0f, 0.0f, 1.0f);
|
||||
volume_ray.t = FLT_MAX;
|
||||
|
||||
const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY);
|
||||
int stack_index = 0, enclosed_index = 0;
|
||||
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
const uint32_t visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, PATH_RAY_CAMERA);
|
||||
|
||||
/* Initialize volume stack with background volume For shadow catcher the
|
||||
* background volume is always assumed to be CG. */
|
||||
/* Write background shader. */
|
||||
if (kernel_data.background.volume_shader != SHADER_NONE) {
|
||||
if (!(path_flag & PATH_RAY_SHADOW_CATCHER_PASS)) {
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, stack_index, object) = OBJECT_NONE;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||
state, volume_stack, stack_index, shader) = kernel_data.background.volume_shader;
|
||||
stack_index++;
|
||||
}
|
||||
const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
|
||||
integrator_state_write_volume_stack(state, stack_index, new_entry);
|
||||
stack_index++;
|
||||
}
|
||||
|
||||
/* Store to avoid global fetches on every intersection step. */
|
||||
@@ -160,7 +147,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
|
||||
int step = 0;
|
||||
|
||||
while (stack_index < volume_stack_size - 1 && enclosed_index < MAX_VOLUME_STACK_SIZE - 1 &&
|
||||
while (stack_index < volume_stack_size - 1 && enclosed_index < volume_stack_size - 1 &&
|
||||
step < 2 * volume_stack_size) {
|
||||
Intersection isect;
|
||||
if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
|
||||
@@ -211,22 +198,9 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
/* Write terminator. */
|
||||
const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
|
||||
integrator_state_write_volume_stack(state, stack_index, new_entry);
|
||||
}
|
||||
|
||||
ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
integrator_volume_stack_init(kg, state);
|
||||
|
||||
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_PASS) {
|
||||
/* Volume stack re-init for shadow catcher, continue with shading of hit. */
|
||||
integrator_intersect_next_kernel_after_shadow_catcher_volume<
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK>(kg, state);
|
||||
}
|
||||
else {
|
||||
/* Volume stack init for camera rays, continue with intersection of camera ray. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -76,7 +76,7 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
|
||||
if (queued_kernel) {
|
||||
switch (queued_kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
integrator_intersect_closest(kg, state, render_buffer);
|
||||
integrator_intersect_closest(kg, state);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
|
||||
integrator_shade_background(kg, state, render_buffer);
|
||||
|
@@ -67,7 +67,6 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
|
||||
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
|
||||
@@ -185,7 +184,7 @@ ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state,
|
||||
|
||||
/* Render pass categories. */
|
||||
if (bounce == 1) {
|
||||
flag |= PATH_RAY_SURFACE_PASS;
|
||||
flag |= (label & LABEL_TRANSMIT) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -175,7 +175,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
|
||||
|
||||
/* Write to render buffer. */
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_accum_emission(kg, state, throughput * light_eval, render_buffer);
|
||||
kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -192,11 +192,23 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
|
||||
/* Special case for shadow catcher where we want to fill the background pass
|
||||
* behind the shadow catcher but also continue tracing the path. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
integrator_intersect_next_kernel_after_shadow_catcher_background<
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND>(kg, state);
|
||||
|
||||
const int isect_prim = INTEGRATOR_STATE(state, isect, prim);
|
||||
const int isect_type = INTEGRATOR_STATE(state, isect, type);
|
||||
const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
|
||||
const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if (shader_flags & SD_HAS_RAYTRACE) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
|
||||
shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
|
||||
shader);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@@ -90,7 +90,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
|
||||
|
||||
/* Write to render buffer. */
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_accum_emission(kg, state, throughput * light_eval, render_buffer);
|
||||
kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
|
||||
}
|
||||
|
||||
ccl_device void integrator_shade_light(KernelGlobals kg,
|
||||
|
@@ -101,7 +101,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
|
||||
}
|
||||
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_accum_emission(kg, state, throughput * L, render_buffer);
|
||||
kernel_accum_emission(kg, state, throughput, L, render_buffer);
|
||||
}
|
||||
#endif /* __EMISSION__ */
|
||||
|
||||
@@ -191,18 +191,14 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
|
||||
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
|
||||
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
|
||||
shadow_flag |= PATH_RAY_SURFACE_PASS;
|
||||
shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
const float3 pass_diffuse_weight = (bounce == 0) ?
|
||||
bsdf_eval_pass_diffuse_weight(&bsdf_eval) :
|
||||
INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||
const float3 pass_glossy_weight = (bounce == 0) ?
|
||||
bsdf_eval_pass_glossy_weight(&bsdf_eval) :
|
||||
INTEGRATOR_STATE(state, path, pass_glossy_weight);
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = pass_glossy_weight;
|
||||
const float3 diffuse_glossy_ratio = (bounce == 0) ?
|
||||
bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
|
||||
INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
|
||||
}
|
||||
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
|
||||
@@ -287,9 +283,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = bsdf_eval_pass_diffuse_weight(
|
||||
&bsdf_eval);
|
||||
INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = bsdf_eval_pass_glossy_weight(
|
||||
INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(
|
||||
&bsdf_eval);
|
||||
}
|
||||
}
|
||||
@@ -451,7 +445,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
||||
}
|
||||
#endif
|
||||
|
||||
shader_prepare_surface_closures(kg, state, &sd, path_flag);
|
||||
shader_prepare_surface_closures(kg, state, &sd);
|
||||
|
||||
#ifdef __HOLDOUT__
|
||||
/* Evaluate holdout. */
|
||||
@@ -485,7 +479,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
||||
if (!(path_flag & PATH_RAY_SUBSURFACE)) {
|
||||
const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
|
||||
0.0f :
|
||||
INTEGRATOR_STATE(state, path, continuation_probability);
|
||||
path_state_continuation_probability(kg, state, path_flag);
|
||||
if (probability == 0.0f) {
|
||||
return false;
|
||||
}
|
||||
@@ -498,6 +492,10 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
||||
kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
|
||||
#endif
|
||||
|
||||
/* Direct light. */
|
||||
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
|
||||
integrate_surface_direct_light(kg, state, &sd, &rng_state);
|
||||
|
@@ -608,7 +608,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
if (!result.indirect_scatter) {
|
||||
const float3 emission = volume_emission_integrate(
|
||||
&coeff, closure_flag, transmittance, dt);
|
||||
accum_emission += result.indirect_throughput * emission;
|
||||
accum_emission += emission;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
|
||||
/* Write accumulated emission. */
|
||||
if (!is_zero(accum_emission)) {
|
||||
kernel_accum_emission(kg, state, accum_emission, render_buffer);
|
||||
kernel_accum_emission(kg, state, result.indirect_throughput, accum_emission, render_buffer);
|
||||
}
|
||||
|
||||
# ifdef __DENOISING_FEATURES__
|
||||
@@ -794,11 +794,10 @@ ccl_device_forceinline void integrate_volume_direct_light(
|
||||
const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
const float3 pass_diffuse_weight = (bounce == 0) ?
|
||||
one_float3() :
|
||||
INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = zero_float3();
|
||||
const float3 diffuse_glossy_ratio = (bounce == 0) ?
|
||||
one_float3() :
|
||||
INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
|
||||
}
|
||||
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
|
||||
@@ -877,8 +876,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
|
||||
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
|
||||
INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
|
||||
INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
|
||||
}
|
||||
|
||||
/* Update path state */
|
||||
@@ -943,7 +941,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
|
||||
0.0f :
|
||||
INTEGRATOR_STATE(state, path, continuation_probability);
|
||||
path_state_continuation_probability(kg, state, path_flag);
|
||||
if (probability == 0.0f) {
|
||||
return VOLUME_PATH_MISSED;
|
||||
}
|
||||
@@ -1025,9 +1023,25 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
||||
}
|
||||
else {
|
||||
/* Continue to background, light or surface. */
|
||||
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
||||
kg, state, &isect, render_buffer);
|
||||
return;
|
||||
if (isect.prim == PRIM_NONE) {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
else if (isect.type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, &isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
||||
kg, state, &isect, shader, flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif /* __VOLUME__ */
|
||||
}
|
||||
|
@@ -105,45 +105,8 @@ ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases
|
||||
|
||||
ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
|
||||
ConstIntegratorState state,
|
||||
ccl_private ShaderData *sd,
|
||||
const uint32_t path_flag)
|
||||
ccl_private ShaderData *sd)
|
||||
{
|
||||
/* Filter out closures. */
|
||||
if (kernel_data.integrator.filter_closures) {
|
||||
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) {
|
||||
sd->closure_emission_background = zero_float3();
|
||||
}
|
||||
|
||||
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) {
|
||||
sd->flag &= ~SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
if (path_flag & PATH_RAY_CAMERA) {
|
||||
for (int i = 0; i < sd->num_closure; i++) {
|
||||
ccl_private ShaderClosure *sc = &sd->closure[i];
|
||||
|
||||
if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
|
||||
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE) {
|
||||
sc->type = CLOSURE_NONE_ID;
|
||||
sc->sample_weight = 0.0f;
|
||||
}
|
||||
}
|
||||
else if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) {
|
||||
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY) {
|
||||
sc->type = CLOSURE_NONE_ID;
|
||||
sc->sample_weight = 0.0f;
|
||||
}
|
||||
}
|
||||
else if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) {
|
||||
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION) {
|
||||
sc->type = CLOSURE_NONE_ID;
|
||||
sc->sample_weight = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Defensive sampling.
|
||||
*
|
||||
* We can likely also do defensive sampling at deeper bounces, particularly
|
||||
@@ -246,7 +209,8 @@ ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
|
||||
float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
|
||||
|
||||
if (bsdf_pdf != 0.0f) {
|
||||
bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
|
||||
const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
|
||||
bsdf_eval_accum(result_eval, is_diffuse, eval * sc->weight, 1.0f);
|
||||
sum_pdf += bsdf_pdf * sc->sample_weight;
|
||||
}
|
||||
}
|
||||
@@ -271,7 +235,7 @@ ccl_device_inline
|
||||
ccl_private BsdfEval *bsdf_eval,
|
||||
const uint light_shader_flags)
|
||||
{
|
||||
bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3());
|
||||
bsdf_eval_init(bsdf_eval, false, zero_float3());
|
||||
|
||||
return _shader_bsdf_multi_eval(
|
||||
kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
|
||||
@@ -364,7 +328,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
|
||||
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
|
||||
|
||||
if (*pdf != 0.0f) {
|
||||
bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
|
||||
const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
|
||||
bsdf_eval_init(bsdf_eval, is_diffuse, eval * sc->weight);
|
||||
|
||||
if (sd->num_closure > 1) {
|
||||
const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
|
||||
@@ -690,7 +655,7 @@ ccl_device_inline float _shader_volume_phase_multi_eval(
|
||||
float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
|
||||
|
||||
if (phase_pdf != 0.0f) {
|
||||
bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
|
||||
bsdf_eval_accum(result_eval, false, eval, 1.0f);
|
||||
sum_pdf += phase_pdf * svc->sample_weight;
|
||||
}
|
||||
|
||||
@@ -706,7 +671,7 @@ ccl_device float shader_volume_phase_eval(KernelGlobals kg,
|
||||
const float3 omega_in,
|
||||
ccl_private BsdfEval *phase_eval)
|
||||
{
|
||||
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3());
|
||||
bsdf_eval_init(phase_eval, false, zero_float3());
|
||||
|
||||
return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
|
||||
}
|
||||
@@ -764,7 +729,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
|
||||
label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
|
||||
|
||||
if (*pdf != 0.0f) {
|
||||
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
|
||||
bsdf_eval_init(phase_eval, false, eval);
|
||||
}
|
||||
|
||||
return label;
|
||||
@@ -787,7 +752,7 @@ ccl_device int shader_phase_sample_closure(KernelGlobals kg,
|
||||
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
|
||||
|
||||
if (*pdf != 0.0f)
|
||||
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
|
||||
bsdf_eval_init(phase_eval, false, eval);
|
||||
|
||||
return label;
|
||||
}
|
||||
|
@@ -16,7 +16,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/film/write_passes.h"
|
||||
#include "kernel/integrator/path_state.h"
|
||||
#include "kernel/integrator/state_util.h"
|
||||
|
||||
@@ -48,7 +47,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
|
||||
return false;
|
||||
}
|
||||
|
||||
if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
|
||||
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -77,6 +76,33 @@ ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
|
||||
return (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) != 0;
|
||||
}
|
||||
|
||||
/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
|
||||
* after this function. */
|
||||
ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const int object_flags)
|
||||
{
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The split is to be done. Mark the current state as such, so that it stops contributing to the
|
||||
* shadow catcher matte pass, but keeps contributing to the combined pass. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
||||
|
||||
/* Split new state from the current one. This new state will only track contribution of shadow
|
||||
* catcher objects ignoring non-catcher objects. */
|
||||
integrator_state_shadow_catcher_split(kg, state);
|
||||
|
||||
return true;
|
||||
#else
|
||||
(void)object_flags;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
||||
ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(const uint32_t path_flag)
|
||||
@@ -89,28 +115,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
|
||||
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
|
||||
}
|
||||
|
||||
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
||||
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
||||
|
||||
const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
|
||||
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
|
||||
kernel_data.film.pass_stride;
|
||||
ccl_global float *buffer = render_buffer + render_buffer_offset;
|
||||
|
||||
/* Count sample for the shadow catcher object. */
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
||||
|
||||
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
||||
* transparency to the matte. */
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
||||
average(throughput));
|
||||
}
|
||||
|
||||
#endif /* __SHADOW_CATCHER__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -20,7 +20,7 @@ KERNEL_STRUCT_BEGIN(shadow_path)
|
||||
/* Index of a pixel within the device render buffer. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Current sample number. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, sample, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Random number generator seed. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Random number dimension offset. */
|
||||
@@ -46,9 +46,8 @@ KERNEL_STRUCT_MEMBER(shadow_path,
|
||||
float3,
|
||||
unshadowed_throughput,
|
||||
KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
|
||||
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||
/* Ratio of throughput to distinguish diffuse and glossy render passes. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
|
||||
/* Number of intersections found by ray-tracing. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_END(shadow_path)
|
||||
|
@@ -173,10 +173,10 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
|
||||
|
||||
/* Array access on GPU with Structure-of-Arrays. */
|
||||
|
||||
typedef int IntegratorState;
|
||||
typedef int ConstIntegratorState;
|
||||
typedef int IntegratorShadowState;
|
||||
typedef int ConstIntegratorShadowState;
|
||||
typedef const int IntegratorState;
|
||||
typedef const int ConstIntegratorState;
|
||||
typedef const int IntegratorShadowState;
|
||||
typedef const int ConstIntegratorShadowState;
|
||||
|
||||
# define INTEGRATOR_STATE_NULL -1
|
||||
|
||||
|
@@ -25,7 +25,7 @@ KERNEL_STRUCT_BEGIN(path)
|
||||
* The multiplication is delayed for later, so that state can use 32bit integer. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Current sample number. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint32_t, sample, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Current ray bounce depth. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Current transparent ray bounce depth. */
|
||||
@@ -56,13 +56,10 @@ KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Filter glossy. */
|
||||
KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Continuation probability for path termination. */
|
||||
KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Throughput. */
|
||||
KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
|
||||
KERNEL_STRUCT_MEMBER(path, float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||
KERNEL_STRUCT_MEMBER(path, float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||
/* Ratio of throughput to distinguish diffuse and glossy render passes. */
|
||||
KERNEL_STRUCT_MEMBER(path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
|
||||
/* Denoising. */
|
||||
KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
|
||||
/* Shader sorting. */
|
||||
|
@@ -326,8 +326,8 @@ ccl_device_inline void integrator_shadow_state_move(KernelGlobals kg,
|
||||
|
||||
/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
|
||||
* after this function. */
|
||||
ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGlobals kg,
|
||||
IntegratorState state)
|
||||
ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
|
||||
IntegratorState state)
|
||||
{
|
||||
#if defined(__KERNEL_GPU__)
|
||||
ConstIntegratorState to_state = atomic_fetch_and_add_uint32(
|
||||
@@ -337,14 +337,14 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl
|
||||
#else
|
||||
IntegratorStateCPU *ccl_restrict to_state = state + 1;
|
||||
|
||||
/* Only copy the required subset for performance. */
|
||||
/* Only copy the required subset, since shadow intersections are big and irrelevant here. */
|
||||
to_state->path = state->path;
|
||||
to_state->ray = state->ray;
|
||||
to_state->isect = state->isect;
|
||||
integrator_state_copy_volume_stack(kg, to_state, state);
|
||||
#endif
|
||||
|
||||
return to_state;
|
||||
INTEGRATOR_STATE_WRITE(to_state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS;
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
|
@@ -79,8 +79,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
|
||||
INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
|
||||
INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -353,8 +353,8 @@ ccl_device bool light_sample_from_distant_ray(KernelGlobals kg,
|
||||
/* compute pdf */
|
||||
float invarea = klight->distant.invarea;
|
||||
ls->pdf = invarea / (costheta * costheta * costheta);
|
||||
ls->eval_fac = ls->pdf;
|
||||
ls->pdf *= kernel_data.integrator.pdf_lights;
|
||||
ls->eval_fac = ls->pdf;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -832,21 +832,16 @@ static bool get_object_attribute(const OSLGlobals::Attribute &attr,
|
||||
{
|
||||
if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
|
||||
attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
|
||||
const float *data = (const float *)attr.value.data();
|
||||
return set_attribute_float3(make_float3(data[0], data[1], data[2]), type, derivatives, val);
|
||||
return set_attribute_float3(*(float3 *)attr.value.data(), type, derivatives, val);
|
||||
}
|
||||
else if (attr.type == TypeFloat2) {
|
||||
const float *data = (const float *)attr.value.data();
|
||||
return set_attribute_float2(make_float2(data[0], data[1]), type, derivatives, val);
|
||||
return set_attribute_float2(*(float2 *)attr.value.data(), type, derivatives, val);
|
||||
}
|
||||
else if (attr.type == TypeDesc::TypeFloat) {
|
||||
const float *data = (const float *)attr.value.data();
|
||||
return set_attribute_float(data[0], type, derivatives, val);
|
||||
return set_attribute_float(*(float *)attr.value.data(), type, derivatives, val);
|
||||
}
|
||||
else if (attr.type == TypeRGBA || attr.type == TypeDesc::TypeFloat4) {
|
||||
const float *data = (const float *)attr.value.data();
|
||||
return set_attribute_float4(
|
||||
make_float4(data[0], data[1], data[2], data[3]), type, derivatives, val);
|
||||
return set_attribute_float4(*(float4 *)attr.value.data(), type, derivatives, val);
|
||||
}
|
||||
else if (attr.type == type) {
|
||||
size_t datasize = attr.value.datasize();
|
||||
|
@@ -132,12 +132,10 @@ static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
|
||||
/* Used by render-services. */
|
||||
sd->osl_globals = kg;
|
||||
if (path_flag & PATH_RAY_SHADOW) {
|
||||
sd->osl_path_state = nullptr;
|
||||
sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
|
||||
}
|
||||
else {
|
||||
sd->osl_path_state = (const IntegratorStateCPU *)state;
|
||||
sd->osl_shadow_path_state = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -697,7 +697,7 @@ shader node_musgrave_texture(
|
||||
output float Fac = 0.0)
|
||||
{
|
||||
float dimension = max(Dimension, 1e-5);
|
||||
float octaves = clamp(Detail, 0.0, 15.0);
|
||||
float octaves = clamp(Detail, 0.0, 16.0);
|
||||
float lacunarity = max(Lacunarity, 1e-5);
|
||||
|
||||
vector3 s = Vector;
|
||||
|
@@ -90,7 +90,7 @@ float fractal_noise(float p, float details, float roughness)
|
||||
float amp = 1.0;
|
||||
float maxamp = 0.0;
|
||||
float sum = 0.0;
|
||||
float octaves = clamp(details, 0.0, 15.0);
|
||||
float octaves = clamp(details, 0.0, 16.0);
|
||||
int n = (int)octaves;
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = safe_noise(fscale * p);
|
||||
@@ -119,7 +119,7 @@ float fractal_noise(vector2 p, float details, float roughness)
|
||||
float amp = 1.0;
|
||||
float maxamp = 0.0;
|
||||
float sum = 0.0;
|
||||
float octaves = clamp(details, 0.0, 15.0);
|
||||
float octaves = clamp(details, 0.0, 16.0);
|
||||
int n = (int)octaves;
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = safe_noise(fscale * p);
|
||||
@@ -148,7 +148,7 @@ float fractal_noise(vector3 p, float details, float roughness)
|
||||
float amp = 1.0;
|
||||
float maxamp = 0.0;
|
||||
float sum = 0.0;
|
||||
float octaves = clamp(details, 0.0, 15.0);
|
||||
float octaves = clamp(details, 0.0, 16.0);
|
||||
int n = (int)octaves;
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = safe_noise(fscale * p);
|
||||
@@ -177,7 +177,7 @@ float fractal_noise(vector4 p, float details, float roughness)
|
||||
float amp = 1.0;
|
||||
float maxamp = 0.0;
|
||||
float sum = 0.0;
|
||||
float octaves = clamp(details, 0.0, 15.0);
|
||||
float octaves = clamp(details, 0.0, 16.0);
|
||||
int n = (int)octaves;
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = safe_noise(fscale * p);
|
||||
|
@@ -27,7 +27,7 @@ ccl_device_noinline float fractal_noise_1d(float p, float octaves, float roughne
|
||||
float amp = 1.0f;
|
||||
float maxamp = 0.0f;
|
||||
float sum = 0.0f;
|
||||
octaves = clamp(octaves, 0.0f, 15.0f);
|
||||
octaves = clamp(octaves, 0.0f, 16.0f);
|
||||
int n = float_to_int(octaves);
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = noise_1d(fscale * p);
|
||||
@@ -56,7 +56,7 @@ ccl_device_noinline float fractal_noise_2d(float2 p, float octaves, float roughn
|
||||
float amp = 1.0f;
|
||||
float maxamp = 0.0f;
|
||||
float sum = 0.0f;
|
||||
octaves = clamp(octaves, 0.0f, 15.0f);
|
||||
octaves = clamp(octaves, 0.0f, 16.0f);
|
||||
int n = float_to_int(octaves);
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = noise_2d(fscale * p);
|
||||
@@ -85,7 +85,7 @@ ccl_device_noinline float fractal_noise_3d(float3 p, float octaves, float roughn
|
||||
float amp = 1.0f;
|
||||
float maxamp = 0.0f;
|
||||
float sum = 0.0f;
|
||||
octaves = clamp(octaves, 0.0f, 15.0f);
|
||||
octaves = clamp(octaves, 0.0f, 16.0f);
|
||||
int n = float_to_int(octaves);
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = noise_3d(fscale * p);
|
||||
@@ -114,7 +114,7 @@ ccl_device_noinline float fractal_noise_4d(float4 p, float octaves, float roughn
|
||||
float amp = 1.0f;
|
||||
float maxamp = 0.0f;
|
||||
float sum = 0.0f;
|
||||
octaves = clamp(octaves, 0.0f, 15.0f);
|
||||
octaves = clamp(octaves, 0.0f, 16.0f);
|
||||
int n = float_to_int(octaves);
|
||||
for (int i = 0; i <= n; i++) {
|
||||
float t = noise_4d(fscale * p);
|
||||
|
@@ -737,7 +737,7 @@ ccl_device_noinline int svm_node_tex_musgrave(KernelGlobals kg,
|
||||
float gain = stack_load_float_default(stack, gain_stack_offset, defaults2.z);
|
||||
|
||||
dimension = fmaxf(dimension, 1e-5f);
|
||||
detail = clamp(detail, 0.0f, 15.0f);
|
||||
detail = clamp(detail, 0.0f, 16.0f);
|
||||
lacunarity = fmaxf(lacunarity, 1e-5f);
|
||||
|
||||
float fac;
|
||||
|
@@ -286,26 +286,27 @@ enum PathRayFlag {
|
||||
PATH_RAY_DENOISING_FEATURES = (1U << 23U),
|
||||
|
||||
/* Render pass categories. */
|
||||
PATH_RAY_SURFACE_PASS = (1U << 24U),
|
||||
PATH_RAY_VOLUME_PASS = (1U << 25U),
|
||||
PATH_RAY_ANY_PASS = (PATH_RAY_SURFACE_PASS | PATH_RAY_VOLUME_PASS),
|
||||
PATH_RAY_REFLECT_PASS = (1U << 24U),
|
||||
PATH_RAY_TRANSMISSION_PASS = (1U << 25U),
|
||||
PATH_RAY_VOLUME_PASS = (1U << 26U),
|
||||
PATH_RAY_ANY_PASS = (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS | PATH_RAY_VOLUME_PASS),
|
||||
|
||||
/* Shadow ray is for a light or surface, or AO. */
|
||||
PATH_RAY_SHADOW_FOR_LIGHT = (1U << 26U),
|
||||
PATH_RAY_SHADOW_FOR_AO = (1U << 27U),
|
||||
PATH_RAY_SHADOW_FOR_LIGHT = (1U << 27U),
|
||||
PATH_RAY_SHADOW_FOR_AO = (1U << 28U),
|
||||
|
||||
/* A shadow catcher object was hit and the path was split into two. */
|
||||
PATH_RAY_SHADOW_CATCHER_HIT = (1U << 28U),
|
||||
PATH_RAY_SHADOW_CATCHER_HIT = (1U << 29U),
|
||||
|
||||
/* A shadow catcher object was hit and this path traces only shadow catchers, writing them into
|
||||
* their dedicated pass for later division.
|
||||
*
|
||||
* NOTE: Is not covered with `PATH_RAY_ANY_PASS` because shadow catcher does special handling
|
||||
* which is separate from the light passes. */
|
||||
PATH_RAY_SHADOW_CATCHER_PASS = (1U << 29U),
|
||||
PATH_RAY_SHADOW_CATCHER_PASS = (1U << 30U),
|
||||
|
||||
/* Path is evaluating background for an approximate shadow catcher with non-transparent film. */
|
||||
PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 30U),
|
||||
PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 31U),
|
||||
};
|
||||
|
||||
/* Configure ray visibility bits for rays and objects respectively,
|
||||
@@ -427,19 +428,8 @@ typedef enum CryptomatteType {
|
||||
typedef struct BsdfEval {
|
||||
float3 diffuse;
|
||||
float3 glossy;
|
||||
float3 sum;
|
||||
} BsdfEval;
|
||||
|
||||
/* Closure Filter */
|
||||
|
||||
typedef enum FilterClosures {
|
||||
FILTER_CLOSURE_EMISSION = (1 << 0),
|
||||
FILTER_CLOSURE_DIFFUSE = (1 << 1),
|
||||
FILTER_CLOSURE_GLOSSY = (1 << 2),
|
||||
FILTER_CLOSURE_TRANSMISSION = (1 << 3),
|
||||
FILTER_CLOSURE_DIRECT_LIGHT = (1 << 4),
|
||||
} FilterClosures;
|
||||
|
||||
/* Shader Flag */
|
||||
|
||||
typedef enum ShaderFlag {
|
||||
@@ -1196,11 +1186,7 @@ typedef struct KernelIntegrator {
|
||||
int has_shadow_catcher;
|
||||
float scrambling_distance;
|
||||
|
||||
/* Closure filter. */
|
||||
int filter_closures;
|
||||
|
||||
/* padding */
|
||||
int pad1, pad2, pad3;
|
||||
} KernelIntegrator;
|
||||
static_assert_align(KernelIntegrator, 16);
|
||||
|
||||
@@ -1424,7 +1410,6 @@ typedef struct KernelWorkTile {
|
||||
|
||||
uint start_sample;
|
||||
uint num_samples;
|
||||
uint sample_offset;
|
||||
|
||||
int offset;
|
||||
uint stride;
|
||||
|
@@ -43,7 +43,7 @@ bool ConstantFolder::all_inputs_constant() const
|
||||
|
||||
void ConstantFolder::make_constant(float value) const
|
||||
{
|
||||
VLOG(3) << "Folding " << node->name << "::" << output->name() << " to constant (" << value
|
||||
VLOG(1) << "Folding " << node->name << "::" << output->name() << " to constant (" << value
|
||||
<< ").";
|
||||
|
||||
foreach (ShaderInput *sock, output->links) {
|
||||
@@ -56,7 +56,7 @@ void ConstantFolder::make_constant(float value) const
|
||||
|
||||
void ConstantFolder::make_constant(float3 value) const
|
||||
{
|
||||
VLOG(3) << "Folding " << node->name << "::" << output->name() << " to constant " << value << ".";
|
||||
VLOG(1) << "Folding " << node->name << "::" << output->name() << " to constant " << value << ".";
|
||||
|
||||
foreach (ShaderInput *sock, output->links) {
|
||||
sock->set(value);
|
||||
@@ -112,7 +112,7 @@ void ConstantFolder::bypass(ShaderOutput *new_output) const
|
||||
{
|
||||
assert(new_output);
|
||||
|
||||
VLOG(3) << "Folding " << node->name << "::" << output->name() << " to socket "
|
||||
VLOG(1) << "Folding " << node->name << "::" << output->name() << " to socket "
|
||||
<< new_output->parent->name << "::" << new_output->name() << ".";
|
||||
|
||||
/* Remove all outgoing links from socket and connect them to new_output instead.
|
||||
@@ -131,7 +131,7 @@ void ConstantFolder::discard() const
|
||||
{
|
||||
assert(output->type() == SocketType::CLOSURE);
|
||||
|
||||
VLOG(3) << "Discarding closure " << node->name << ".";
|
||||
VLOG(1) << "Discarding closure " << node->name << ".";
|
||||
|
||||
graph->disconnect(output);
|
||||
}
|
||||
|
@@ -187,6 +187,8 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
|
||||
kfilm->pass_transmission_indirect = PASS_UNUSED;
|
||||
kfilm->pass_volume_direct = PASS_UNUSED;
|
||||
kfilm->pass_volume_indirect = PASS_UNUSED;
|
||||
kfilm->pass_volume_direct = PASS_UNUSED;
|
||||
kfilm->pass_volume_indirect = PASS_UNUSED;
|
||||
kfilm->pass_shadow = PASS_UNUSED;
|
||||
|
||||
/* Mark passes as unused so that the kernel knows the pass is inaccessible. */
|
||||
@@ -671,12 +673,13 @@ uint Film::get_kernel_features(const Scene *scene) const
|
||||
kernel_features |= KERNEL_FEATURE_DENOISING;
|
||||
}
|
||||
|
||||
if (pass_type >= PASS_DIFFUSE && pass_type <= PASS_VOLUME_INDIRECT) {
|
||||
if (pass_type != PASS_NONE && pass_type != PASS_COMBINED &&
|
||||
pass_type <= PASS_CATEGORY_LIGHT_END) {
|
||||
kernel_features |= KERNEL_FEATURE_LIGHT_PASSES;
|
||||
}
|
||||
|
||||
if (pass_type == PASS_SHADOW) {
|
||||
kernel_features |= KERNEL_FEATURE_SHADOW_PASS;
|
||||
if (pass_type == PASS_SHADOW) {
|
||||
kernel_features |= KERNEL_FEATURE_SHADOW_PASS;
|
||||
}
|
||||
}
|
||||
|
||||
if (pass_type == PASS_AO) {
|
||||
|
@@ -1588,20 +1588,9 @@ void GeometryManager::device_update_displacement_images(Device *device,
|
||||
set<int> bump_images;
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_modified()) {
|
||||
/* Geometry-level check for hair shadow transparency.
|
||||
* This matches the logic in the `Hair::update_shadow_transparency()`, avoiding access to
|
||||
* possible non-loaded images. */
|
||||
bool need_shadow_transparency = false;
|
||||
if (geom->geometry_type == Geometry::HAIR) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
need_shadow_transparency = hair->need_shadow_transparency();
|
||||
}
|
||||
|
||||
foreach (Node *node, geom->get_used_shaders()) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
const bool is_true_displacement = (shader->has_displacement &&
|
||||
shader->get_displacement_method() != DISPLACE_BUMP);
|
||||
if (!is_true_displacement && !need_shadow_transparency) {
|
||||
if (!shader->has_displacement || shader->get_displacement_method() == DISPLACE_BUMP) {
|
||||
continue;
|
||||
}
|
||||
foreach (ShaderNode *node, shader->graph->nodes) {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user