Compare commits
303 Commits
temp-T9593
...
file-brows
Author | SHA1 | Date | |
---|---|---|---|
3a97c4056f | |||
9dbcefb10e | |||
85f0b2ef5d | |||
c4d8e28aa7 | |||
0c6ae51d9f | |||
712960cefd | |||
bffc1fbf31 | |||
3ea91cecc9 | |||
![]() |
7561183830 | ||
6bae10ef45 | |||
0f8e58e7b8 | |||
29c68e2523 | |||
a814c7091b | |||
7f8d05131a | |||
8d69c6c4e7 | |||
e6855507a5 | |||
054a169be0 | |||
ae49e0e8be | |||
![]() |
c48dc61749 | ||
![]() |
92ca920c52 | ||
ecf4f4a71f | |||
c3b9a4e001 | |||
215f805ce6 | |||
2551cf9087 | |||
5d6e4822d8 | |||
40ffb94ab4 | |||
410a6efb74 | |||
e9f82d3dc7 | |||
![]() |
28985ccc05 | ||
43d04c7eeb | |||
![]() |
3b7ac10d62 | ||
75e62df429 | |||
cd478fbfb3 | |||
edc89c7f46 | |||
d14c2d549b | |||
597955d0a8 | |||
16b145bc62 | |||
fb9dc810f1 | |||
![]() |
801513efa0 | ||
87ae10a050 | |||
35b4e3a350 | |||
c771dd5e9c | |||
6a1ab4747b | |||
9246ff373a | |||
![]() |
9835d5e58b | ||
![]() |
6bba4d864e | ||
3370c1a8a7 | |||
ad5e3d30a2 | |||
2e3fb58128 | |||
5bee991132 | |||
348ec37f52 | |||
![]() |
2f834bfc14 | ||
![]() |
4812eda3c5 | ||
2232855b50 | |||
95fd7c3679 | |||
bc6b612d8b | |||
![]() |
44f1495b57 | ||
e8465f941c | |||
835203fde8 | |||
73f8a7ca0a | |||
6d2100f7de | |||
99faebfca6 | |||
2280a71f90 | |||
9b2b61a07b | |||
d3c063188e | |||
ff98b5eaa8 | |||
e00a027c1e | |||
533a5a6a8c | |||
bbf87c4f75 | |||
7ebd1f4b79 | |||
37922eab90 | |||
135e530356 | |||
![]() |
d175eb6c30 | ||
8358cc7963 | |||
cd21022b78 | |||
935b7a6f65 | |||
1f8567ac68 | |||
757041560f | |||
3407ed5f9b | |||
![]() |
dec8854bf3 | ||
![]() |
47f3b53756 | ||
c7f788b877 | |||
3c016fbfd0 | |||
cd1e4ae448 | |||
9a14887905 | |||
d136a996ca | |||
![]() |
1e4c557d82 | ||
0a8d21e0c9 | |||
49babc7caa | |||
f76b537d48 | |||
bf49e6040c | |||
b985437283 | |||
92a99c1496 | |||
5152c7c152 | |||
bb376da6df | |||
03aeef64d5 | |||
c505f19efe | |||
1cf465bbc3 | |||
5e1229f253 | |||
82f65d8971 | |||
011d3c75a7 | |||
00dc747702 | |||
![]() |
180db0f752 | ||
914617f8fd | |||
523bbf7065 | |||
da4ef05e4d | |||
5653c5fcdd | |||
79da7f2a8f | |||
![]() |
2e70d5cb98 | ||
5ddbc14bb2 | |||
9ea1b88f0f | |||
b8ffd43bd2 | |||
4d7c990180 | |||
e69c4482f1 | |||
![]() |
8e1323f633 | ||
f4d7ea2cf6 | |||
98f688ac42 | |||
862170c0b1 | |||
ca1daf4cda | |||
c2715dc416 | |||
63ea0f7581 | |||
8fd2b79ca1 | |||
d8094f9212 | |||
60f260eb6a | |||
d14d570580 | |||
c8e8f107bf | |||
675f6ef089 | |||
178868cf42 | |||
0e9367fc29 | |||
b1329d7eaa | |||
9fedcde750 | |||
bdd0ac5bce | |||
1ef686bd26 | |||
b6de6da59a | |||
3b15467e97 | |||
![]() |
1b5db02a02 | ||
28c3739a9b | |||
5539fb3121 | |||
02ce29c6ee | |||
4b1d315017 | |||
47d4ce498e | |||
2d04012e57 | |||
9dfabc1de3 | |||
93f74299f0 | |||
cdd8b96e3b | |||
eb3e56a36e | |||
db80cf6ad7 | |||
64e196422e | |||
7fa7722350 | |||
6cd30d5ff0 | |||
9024ac31be | |||
cb62095c1c | |||
96cc603037 | |||
c8a07ef663 | |||
bcdce4ffd8 | |||
44e530e1b1 | |||
![]() |
9d73bbd966 | ||
![]() |
8e3879ab52 | ||
![]() |
77df9d788a | ||
![]() |
f48fadc953 | ||
![]() |
1571ee66b5 | ||
3935bf255e | |||
411bcf1fe7 | |||
b35e33317d | |||
d6fef73ef1 | |||
816a73891b | |||
d3374e5337 | |||
931779197a | |||
09a74ff8b6 | |||
50d832634e | |||
144d9f2b2e | |||
88fbf0a8fc | |||
ccdf189d3c | |||
441dd08dba | |||
c484599687 | |||
c8be3d3b27 | |||
74888cdbfd | |||
a084839605 | |||
b3913d7551 | |||
9422627155 | |||
8f543a73ab | |||
d0a552b5c6 | |||
5f09440d5a | |||
9f153949f9 | |||
1c382a4940 | |||
02aefa7659 | |||
b767628173 | |||
![]() |
93253d5dcc | ||
d58072caf4 | |||
47dd42485e | |||
4a445c8dc0 | |||
f72cedffb6 | |||
2d1fe736fa | |||
57097e9a85 | |||
![]() |
4344b2bf19 | ||
52b7f2b089 | |||
bb3a538843 | |||
8bd32019ca | |||
6e6da22eb0 | |||
b8d1e576bc | |||
ae6a4fcc7a | |||
00c7e760b3 | |||
2a1d12d7a0 | |||
995c904d00 | |||
8ca09e6c5e | |||
![]() |
76d8614236 | ||
6e426259b4 | |||
6ca5ac2084 | |||
275419f6fd | |||
1c4c904786 | |||
da101118d4 | |||
7357176b57 | |||
cb39058f2f | |||
133d398120 | |||
d51bc8215f | |||
d4a4691c0c | |||
a83502f05f | |||
7f4ee97b9e | |||
6543290116 | |||
fad857f473 | |||
4114ace616 | |||
443690604f | |||
1de14061cb | |||
9a1d772339 | |||
ef970b7756 | |||
b5d22a8134 | |||
80f8b7cbbb | |||
e3801a2bd4 | |||
d9e00fbbf6 | |||
bc2121147f | |||
2ee6891728 | |||
d8e980a4a6 | |||
8159e0a666 | |||
f639b59a29 | |||
b876ce2a4a | |||
f391e8f316 | |||
0f50ae131f | |||
5723bf926d | |||
becb1530b1 | |||
2c55d8c1cf | |||
05b38ecc78 | |||
aa78278ef6 | |||
155bb95353 | |||
![]() |
a8f7d41d38 | ||
![]() |
2c4dfe3453 | ||
754dae6c76 | |||
47616992f8 | |||
418d82af28 | |||
6fc388743d | |||
66204860ca | |||
e1ced645fa | |||
5162632a20 | |||
3f7015a79f | |||
560c20e067 | |||
63cc972402 | |||
c8c8783088 | |||
1049b0c818 | |||
46934eaf25 | |||
c67a650718 | |||
065bc42ce5 | |||
063689b8a8 | |||
eaa58a1607 | |||
72e67691ed | |||
e1a8a15945 | |||
e91e4bf8e5 | |||
9ae2259db5 | |||
9bac0894f6 | |||
731c1be92a | |||
3d31ad823a | |||
fdc5301205 | |||
2ce7f02a06 | |||
91853d95a9 | |||
eafd98c920 | |||
39eab45c8e | |||
ef58467594 | |||
8f48dd8f72 | |||
d46357dd25 | |||
696295f849 | |||
c9c332f422 | |||
18f8749fb7 | |||
947c73578c | |||
f41368cf02 | |||
f29fa9895f | |||
ff213c802c | |||
5fcf6822dd | |||
9530fb60ad | |||
86ea1ad5df | |||
2011d1f6d0 | |||
f17ea3da02 | |||
400d7235c3 | |||
29fdd43605 | |||
eab2a8479a | |||
ee013f44b5 | |||
f20814892a | |||
39e3580065 | |||
d2c4918d77 | |||
11d0c91ba5 | |||
925b82efb0 | |||
3df2e4e888 | |||
a54bd5fe19 | |||
4b43bd820e | |||
b8b7b0af70 | |||
b3597b1128 |
@@ -565,15 +565,19 @@ endif()
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
|
||||
option(WITH_GLEW_ES "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
|
||||
option(WITH_GL_PROFILE_ES20 "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)" OFF)
|
||||
option(WITH_GPU_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
|
||||
option(WITH_GPU_BUILDTIME_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
|
||||
|
||||
mark_as_advanced(
|
||||
WITH_OPENGL
|
||||
WITH_GLEW_ES
|
||||
WITH_GL_PROFILE_ES20
|
||||
WITH_GPU_SHADER_BUILDER
|
||||
WITH_GPU_BUILDTIME_SHADER_BUILDER
|
||||
)
|
||||
|
||||
if(WITH_HEADLESS)
|
||||
set(WITH_OPENGL OFF)
|
||||
endif()
|
||||
|
||||
# Metal
|
||||
|
||||
if (APPLE)
|
||||
|
@@ -63,6 +63,8 @@ set(DPCPP_EXTRA_ARGS
|
||||
-DPython3_ROOT_DIR=${LIBDIR}/python/
|
||||
-DPython3_EXECUTABLE=${PYTHON_BINARY}
|
||||
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
|
||||
-DLLDB_ENABLE_CURSES=OFF
|
||||
-DLLVM_ENABLE_TERMINFO=OFF
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
|
@@ -10,18 +10,12 @@ set(EMBREE_EXTRA_ARGS
|
||||
-DEMBREE_RAY_MASK=ON
|
||||
-DEMBREE_FILTER_FUNCTION=ON
|
||||
-DEMBREE_BACKFACE_CULLING=OFF
|
||||
-DEMBREE_MAX_ISA=AVX2
|
||||
-DEMBREE_TASKING_SYSTEM=TBB
|
||||
-DEMBREE_TBB_ROOT=${LIBDIR}/tbb
|
||||
-DTBB_ROOT=${LIBDIR}/tbb
|
||||
-DTBB_STATIC_LIB=${TBB_STATIC_LIBRARY}
|
||||
)
|
||||
|
||||
if(BLENDER_PLATFORM_ARM)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
-DEMBREE_MAX_ISA=NEON)
|
||||
else()
|
||||
if (NOT BLENDER_PLATFORM_ARM)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
-DEMBREE_MAX_ISA=AVX2)
|
||||
@@ -30,23 +24,10 @@ endif()
|
||||
if(TBB_STATIC_LIBRARY)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
-DEMBREE_TBB_LIBRARY_NAME=tbb_static
|
||||
-DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_static
|
||||
-DEMBREE_TBB_COMPONENT=tbb_static
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
set(EMBREE_BUILD_DIR ${BUILD_MODE}/)
|
||||
if(BUILD_MODE STREQUAL Debug)
|
||||
list(APPEND EMBREE_EXTRA_ARGS
|
||||
-DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_debug
|
||||
-DEMBREE_TBB_LIBRARY_NAME=tbb_debug
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
set(EMBREE_BUILD_DIR)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_embree
|
||||
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
|
@@ -410,9 +410,9 @@ set(SQLITE_HASH fb558c49ee21a837713c4f1e7e413309aabdd9c7)
|
||||
set(SQLITE_HASH_TYPE SHA1)
|
||||
set(SQLITE_FILE sqlite-src-3240000.zip)
|
||||
|
||||
set(EMBREE_VERSION 3.13.3)
|
||||
set(EMBREE_VERSION 3.13.4)
|
||||
set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
|
||||
set(EMBREE_HASH f62766ba54e48a2f327c3a22596e7133)
|
||||
set(EMBREE_HASH 52d0be294d6c88ba7a6c9e046796e7be)
|
||||
set(EMBREE_HASH_TYPE MD5)
|
||||
set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)
|
||||
|
||||
|
@@ -567,7 +567,7 @@ OPENCOLLADA_FORCE_BUILD=false
|
||||
OPENCOLLADA_FORCE_REBUILD=false
|
||||
OPENCOLLADA_SKIP=false
|
||||
|
||||
EMBREE_VERSION="3.13.3"
|
||||
EMBREE_VERSION="3.13.4"
|
||||
EMBREE_VERSION_SHORT="3.13"
|
||||
EMBREE_VERSION_MIN="3.13"
|
||||
EMBREE_VERSION_MEX="4.0"
|
||||
|
@@ -1,30 +1,37 @@
|
||||
diff -Naur orig/common/sys/platform.h external_embree/common/sys/platform.h
|
||||
--- orig/common/sys/platform.h 2020-05-13 23:08:53 -0600
|
||||
+++ external_embree/common/sys/platform.h 2020-06-13 17:40:26 -0600
|
||||
@@ -84,8 +84,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
diff -Naur org/kernels/rtcore_config.h.in embree-3.13.4/kernels/rtcore_config.h.in
|
||||
--- org/kernels/rtcore_config.h.in 2022-06-14 22:13:52 -0600
|
||||
+++ embree-3.13.4/kernels/rtcore_config.h.in 2022-06-24 15:20:12 -0600
|
||||
@@ -14,6 +14,7 @@
|
||||
#cmakedefine01 EMBREE_MIN_WIDTH
|
||||
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
|
||||
|
||||
+#cmakedefine EMBREE_STATIC_LIB
|
||||
#cmakedefine EMBREE_API_NAMESPACE
|
||||
|
||||
#if defined(EMBREE_API_NAMESPACE)
|
||||
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
|
||||
index 7c2f43d..106b1d5 100644
|
||||
--- a/kernels/CMakeLists.txt
|
||||
+++ b/kernels/CMakeLists.txt
|
||||
@@ -201,6 +201,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
|
||||
#message("AVX2: ${EMBREE_LIBRARY_FILES_AVX2}")
|
||||
#message("AVX512: ${EMBREE_LIBRARY_FILES_AVX512}")
|
||||
|
||||
#ifdef __WIN32__
|
||||
-#define dll_export __declspec(dllexport)
|
||||
-#define dll_import __declspec(dllimport)
|
||||
+#define dll_export
|
||||
+#define dll_import
|
||||
#else
|
||||
#define dll_export __attribute__ ((visibility ("default")))
|
||||
#define dll_import
|
||||
diff --git orig/common/tasking/CMakeLists.txt external_embree/common/tasking/CMakeLists.txt
|
||||
--- orig/common/tasking/CMakeLists.txt
|
||||
+++ external_embree/common/tasking/CMakeLists.txt
|
||||
@@ -27,7 +27,11 @@
|
||||
else()
|
||||
# If not found try getting older TBB via module (FindTBB.cmake)
|
||||
unset(TBB_DIR CACHE)
|
||||
- find_package(TBB 4.1 REQUIRED tbb)
|
||||
+ if (TBB_STATIC_LIB)
|
||||
+ find_package(TBB 4.1 REQUIRED tbb_static)
|
||||
+ else()
|
||||
+ find_package(TBB 4.1 REQUIRED tbb)
|
||||
+ endif()
|
||||
if (TBB_FOUND)
|
||||
TARGET_LINK_LIBRARIES(tasking PUBLIC TBB)
|
||||
TARGET_INCLUDE_DIRECTORIES(tasking PUBLIC "${TBB_INCLUDE_DIRS}")
|
||||
+# Bundle Neon2x into the main static library.
|
||||
+IF(EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB)
|
||||
+ LIST(APPEND EMBREE_LIBRARY_FILES ${EMBREE_LIBRARY_FILES_AVX2})
|
||||
+ LIST(REMOVE_DUPLICATES EMBREE_LIBRARY_FILES)
|
||||
+ENDIF()
|
||||
+
|
||||
# replaces all .cpp files with a dummy file that includes that .cpp file
|
||||
# this is to work around an ICC name mangling issue related to lambda functions under windows
|
||||
MACRO (CreateISADummyFiles list isa)
|
||||
@@ -277,7 +283,7 @@ IF (EMBREE_ISA_AVX AND EMBREE_LIBRARY_FILES_AVX)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
-IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2)
|
||||
+IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2 AND NOT (EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB))
|
||||
DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2})
|
||||
ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2})
|
||||
TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking)
|
||||
|
@@ -71,7 +71,6 @@ if(NOT WIN32)
|
||||
endif()
|
||||
if(WIN32)
|
||||
set(WITH_WASAPI ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
if(UNIX AND NOT APPLE)
|
||||
set(WITH_DOC_MANPAGE ON CACHE BOOL "" FORCE)
|
||||
@@ -92,6 +91,7 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||
|
||||
# Disable AoT kernels compilations until buildbot can deliver them in a reasonabel time.
|
||||
set(WITH_CYCLES_ONEAPI_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
|
@@ -1529,7 +1529,8 @@ def pyrna2sphinx(basepath):
|
||||
else:
|
||||
fw(".. class:: %s\n\n" % struct_id)
|
||||
|
||||
fw(" %s\n\n" % struct.description)
|
||||
write_indented_lines(" ", fw, struct.description, False)
|
||||
fw("\n")
|
||||
|
||||
# Properties sorted in alphabetical order.
|
||||
sorted_struct_properties = struct.properties[:]
|
||||
|
22
extern/audaspace/bindings/C/AUD_Special.cpp
vendored
22
extern/audaspace/bindings/C/AUD_Special.cpp
vendored
@@ -270,7 +270,7 @@ AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int sampl
|
||||
return length;
|
||||
}
|
||||
|
||||
AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
|
||||
AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -282,15 +282,20 @@ AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned i
|
||||
std::shared_ptr<IWriter> writer = FileWriter::createWriter(filename, convCToDSpec(specs), static_cast<Container>(format), static_cast<Codec>(codec), bitrate);
|
||||
FileWriter::writeReader(reader, writer, length, buffersize, callback, data);
|
||||
|
||||
return nullptr;
|
||||
return true;
|
||||
}
|
||||
catch(Exception& e)
|
||||
{
|
||||
return e.getMessage().c_str();
|
||||
if(error && errorsize)
|
||||
{
|
||||
std::strncpy(error, e.getMessage().c_str(), errorsize);
|
||||
error[errorsize - 1] = '\0';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
|
||||
AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -328,11 +333,16 @@ AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start
|
||||
reader->seek(start);
|
||||
FileWriter::writeReader(reader, writers, length, buffersize, callback, data);
|
||||
|
||||
return nullptr;
|
||||
return true;
|
||||
}
|
||||
catch(Exception& e)
|
||||
{
|
||||
return e.getMessage().c_str();
|
||||
if(error && errorsize)
|
||||
{
|
||||
std::strncpy(error, e.getMessage().c_str(), errorsize);
|
||||
error[errorsize - 1] = '\0';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
16
extern/audaspace/bindings/C/AUD_Special.h
vendored
16
extern/audaspace/bindings/C/AUD_Special.h
vendored
@@ -70,13 +70,15 @@ extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, in
|
||||
* \param bitrate The bitrate for encoding.
|
||||
* \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
|
||||
* \param data Pass through parameter that is passed to the callback.
|
||||
* \return An error message or NULL in case of success.
|
||||
* \param error String buffer to copy the error message to in case of failure.
|
||||
* \param errorsize The size of the error buffer.
|
||||
* \return Whether or not the operation succeeded.
|
||||
*/
|
||||
extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
unsigned int buffersize, const char* filename,
|
||||
AUD_DeviceSpecs specs, AUD_Container format,
|
||||
AUD_Codec codec, unsigned int bitrate,
|
||||
void(*callback)(float, void*), void* data);
|
||||
void(*callback)(float, void*), void* data, char* error, size_t errorsize);
|
||||
|
||||
/**
|
||||
* Mixes a sound down into multiple files.
|
||||
@@ -91,13 +93,15 @@ extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, uns
|
||||
* \param bitrate The bitrate for encoding.
|
||||
* \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
|
||||
* \param data Pass through parameter that is passed to the callback.
|
||||
* \return An error message or NULL in case of success.
|
||||
* \param error String buffer to copy the error message to in case of failure.
|
||||
* \param errorsize The size of the error buffer.
|
||||
* \return Whether or not the operation succeeded.
|
||||
*/
|
||||
extern AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
extern AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
unsigned int buffersize, const char* filename,
|
||||
AUD_DeviceSpecs specs, AUD_Container format,
|
||||
AUD_Codec codec, unsigned int bitrate,
|
||||
void(*callback)(float, void*), void* data);
|
||||
void(*callback)(float, void*), void* data, char* error, size_t errorsize);
|
||||
|
||||
/**
|
||||
* Opens a read device and prepares it for mixdown of the sound scene.
|
||||
|
@@ -128,10 +128,6 @@ if(WITH_OPENIMAGEDENOISE)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_EXPERIMENTAL_FEATURES)
|
||||
add_definitions(-DWITH_NEW_CURVES_TYPE)
|
||||
endif()
|
||||
|
||||
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
add_dependencies(bf_intern_cycles bf_rna)
|
||||
|
@@ -84,10 +84,36 @@ class AddPresetViewportSampling(AddPresetBase, Operator):
|
||||
preset_subdir = "cycles/viewport_sampling"
|
||||
|
||||
|
||||
class AddPresetPerformance(AddPresetBase, Operator):
|
||||
'''Add an Performance Preset'''
|
||||
bl_idname = "render.cycles_performance_preset_add"
|
||||
bl_label = "Add Performance Preset"
|
||||
preset_menu = "CYCLES_PT_performance_presets"
|
||||
|
||||
preset_defines = [
|
||||
"render = bpy.context.scene.render"
|
||||
"cycles = bpy.context.scene.cycles"
|
||||
]
|
||||
|
||||
preset_values = [
|
||||
"render.threads_mode",
|
||||
"render.use_persistent_data",
|
||||
"cycles.debug_use_spatial_splits",
|
||||
"cycles.debug_use_compact_bvh",
|
||||
"cycles.debug_use_hair_bvh",
|
||||
"cycles.debug_bvh_time_steps",
|
||||
"cycles.use_auto_tile",
|
||||
"cycles.tile_size",
|
||||
]
|
||||
|
||||
preset_subdir = "cycles/performance"
|
||||
|
||||
|
||||
classes = (
|
||||
AddPresetIntegrator,
|
||||
AddPresetSampling,
|
||||
AddPresetViewportSampling,
|
||||
AddPresetPerformance,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -693,7 +693,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
debug_use_compact_bvh: BoolProperty(
|
||||
name="Use Compact BVH",
|
||||
description="Use compact BVH structure (uses less ram but renders slower)",
|
||||
default=True,
|
||||
default=False,
|
||||
)
|
||||
debug_bvh_time_steps: IntProperty(
|
||||
name="BVH Time Steps",
|
||||
@@ -1560,7 +1560,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
if sys.platform.startswith("win"):
|
||||
col.label(text="and Windows driver version 101.1660 or newer", icon='BLANK1')
|
||||
elif sys.platform.startswith("linux"):
|
||||
col.label(text="and Linux driver version xx.xx.20066 or newer", icon='BLANK1')
|
||||
col.label(text="and Linux driver version xx.xx.23570 or newer", icon='BLANK1')
|
||||
elif device_type == 'METAL':
|
||||
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
|
||||
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
|
||||
|
@@ -43,6 +43,12 @@ class CYCLES_PT_integrator_presets(CyclesPresetPanel):
|
||||
preset_add_operator = "render.cycles_integrator_preset_add"
|
||||
|
||||
|
||||
class CYCLES_PT_performance_presets(CyclesPresetPanel):
|
||||
bl_label = "Performance Presets"
|
||||
preset_subdir = "cycles/performance"
|
||||
preset_add_operator = "render.cycles_performance_preset_add"
|
||||
|
||||
|
||||
class CyclesButtonsPanel:
|
||||
bl_space_type = "PROPERTIES"
|
||||
bl_region_type = "WINDOW"
|
||||
@@ -624,6 +630,9 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Performance"
|
||||
bl_options = {'DEFAULT_CLOSED'}
|
||||
|
||||
def draw_header_preset(self, context):
|
||||
CYCLES_PT_performance_presets.draw_panel_header(self.layout)
|
||||
|
||||
def draw(self, context):
|
||||
pass
|
||||
|
||||
@@ -943,6 +952,8 @@ class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):
|
||||
|
||||
col = split.column()
|
||||
col.prop(dof, "focus_object", text="Focus Object")
|
||||
if dof.focus_object and dof.focus_object.type == 'ARMATURE':
|
||||
col.prop_search(dof, "focus_subtarget", dof.focus_object.data, "bones", text="Focus Bone")
|
||||
|
||||
sub = col.row()
|
||||
sub.active = dof.focus_object is None
|
||||
@@ -1202,7 +1213,7 @@ class CYCLES_OBJECT_PT_lightgroup(CyclesButtonsPanel, Panel):
|
||||
sub.prop_search(ob, "lightgroup", view_layer, "lightgroups", text="Light Group", results_are_suggestions=True)
|
||||
|
||||
sub = row.column(align=True)
|
||||
sub.active = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.enabled = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = ob.lightgroup
|
||||
|
||||
|
||||
@@ -1640,7 +1651,7 @@ class CYCLES_WORLD_PT_settings_light_group(CyclesButtonsPanel, Panel):
|
||||
)
|
||||
|
||||
sub = row.column(align=True)
|
||||
sub.active = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.enabled = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = world.lightgroup
|
||||
|
||||
|
||||
@@ -2269,6 +2280,7 @@ classes = (
|
||||
CYCLES_PT_sampling_presets,
|
||||
CYCLES_PT_viewport_sampling_presets,
|
||||
CYCLES_PT_integrator_presets,
|
||||
CYCLES_PT_performance_presets,
|
||||
CYCLES_RENDER_PT_sampling,
|
||||
CYCLES_RENDER_PT_sampling_viewport,
|
||||
CYCLES_RENDER_PT_sampling_viewport_denoise,
|
||||
|
@@ -143,11 +143,20 @@ static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
|
||||
if (!b_dof_object)
|
||||
return b_camera.dof().focus_distance();
|
||||
|
||||
Transform dofmat = get_transform(b_dof_object.matrix_world());
|
||||
|
||||
string focus_subtarget = b_camera.dof().focus_subtarget();
|
||||
if (b_dof_object.pose() && !focus_subtarget.empty()) {
|
||||
BL::PoseBone b_bone = b_dof_object.pose().bones[focus_subtarget];
|
||||
if (b_bone) {
|
||||
dofmat = dofmat * get_transform(b_bone.matrix());
|
||||
}
|
||||
}
|
||||
|
||||
/* for dof object, return distance along camera Z direction */
|
||||
BL::Array<float, 16> b_ob_matrix;
|
||||
b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
|
||||
Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
|
||||
Transform dofmat = get_transform(b_dof_object.matrix_world());
|
||||
float3 view_dir = normalize(transform_get_column(&obmat, 2));
|
||||
float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
|
||||
return fabsf(dot(view_dir, dof_dir));
|
||||
|
@@ -613,8 +613,6 @@ void BlenderSync::sync_particle_hair(
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
|
||||
static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_curves.attributes) {
|
||||
@@ -632,6 +630,25 @@ static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static BL::FloatVectorAttribute find_curves_position_attribute(BL::Curves b_curves)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_curves.attributes) {
|
||||
if (b_attribute.name() != "position") {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.domain() != BL::Attribute::domain_POINT) {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
|
||||
continue;
|
||||
}
|
||||
return BL::FloatVectorAttribute{b_attribute};
|
||||
}
|
||||
/* The position attribute must exist. */
|
||||
assert(false);
|
||||
return BL::FloatVectorAttribute{b_curves.attributes[0]};
|
||||
}
|
||||
|
||||
template<typename TypeInCycles, typename GetValueAtIndex>
|
||||
static void fill_generic_attribute(BL::Curves &b_curves,
|
||||
TypeInCycles *data,
|
||||
@@ -795,16 +812,16 @@ static void attr_create_generic(Scene *scene,
|
||||
}
|
||||
}
|
||||
|
||||
static float4 hair_point_as_float4(BL::Curves b_curves,
|
||||
static float4 hair_point_as_float4(BL::FloatVectorAttribute b_attr_position,
|
||||
std::optional<BL::FloatAttribute> b_attr_radius,
|
||||
const int index)
|
||||
{
|
||||
float4 mP = float3_to_float4(get_float3(b_curves.position_data[index].vector()));
|
||||
float4 mP = float3_to_float4(get_float3(b_attr_position.data[index].vector()));
|
||||
mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.0f;
|
||||
return mP;
|
||||
}
|
||||
|
||||
static float4 interpolate_hair_points(BL::Curves b_curves,
|
||||
static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
|
||||
std::optional<BL::FloatAttribute> b_attr_radius,
|
||||
const int first_point_index,
|
||||
const int num_points,
|
||||
@@ -814,8 +831,8 @@ static float4 interpolate_hair_points(BL::Curves b_curves,
|
||||
const int point_a = clamp((int)curve_t, 0, num_points - 1);
|
||||
const int point_b = min(point_a + 1, num_points - 1);
|
||||
const float t = curve_t - (float)point_a;
|
||||
return lerp(hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_a),
|
||||
hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_b),
|
||||
return lerp(hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
|
||||
hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
|
||||
t);
|
||||
}
|
||||
|
||||
@@ -848,6 +865,7 @@ static void export_hair_curves(Scene *scene,
|
||||
|
||||
hair->reserve_curves(num_curves, num_keys);
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
|
||||
|
||||
/* Export curves and points. */
|
||||
@@ -866,9 +884,9 @@ static void export_hair_curves(Scene *scene,
|
||||
|
||||
/* Position and radius. */
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
const float3 co = get_float3(b_curves.position_data[first_point_index + i].vector());
|
||||
const float3 co = get_float3(b_attr_position.data[first_point_index + i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[first_point_index + i].value() :
|
||||
0.0f;
|
||||
0.005f;
|
||||
hair->add_curve_key(co, radius);
|
||||
|
||||
if (attr_intercept) {
|
||||
@@ -923,6 +941,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
|
||||
int num_motion_keys = 0;
|
||||
int curve_index = 0;
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
|
||||
|
||||
for (int i = 0; i < num_curves; i++) {
|
||||
@@ -938,7 +957,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
|
||||
int point_index = first_point_index + i;
|
||||
|
||||
if (point_index < num_keys) {
|
||||
mP[num_motion_keys] = hair_point_as_float4(b_curves, b_attr_radius, point_index);
|
||||
mP[num_motion_keys] = hair_point_as_float4(b_attr_position, b_attr_radius, point_index);
|
||||
num_motion_keys++;
|
||||
|
||||
if (!have_motion) {
|
||||
@@ -958,7 +977,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
|
||||
for (int i = 0; i < curve.num_keys; i++) {
|
||||
const float step = i * step_size;
|
||||
mP[num_motion_keys] = interpolate_hair_points(
|
||||
b_curves, b_attr_radius, first_point_index, num_points, step);
|
||||
b_attr_position, b_attr_radius, first_point_index, num_points, step);
|
||||
num_motion_keys++;
|
||||
}
|
||||
have_motion = true;
|
||||
@@ -990,15 +1009,6 @@ void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int
|
||||
export_hair_curves(scene, hair, b_curves, need_motion, motion_scale);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
|
||||
{
|
||||
(void)hair;
|
||||
(void)b_ob_info;
|
||||
(void)motion;
|
||||
(void)motion_step;
|
||||
}
|
||||
#endif
|
||||
|
||||
void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
|
||||
{
|
||||
@@ -1010,14 +1020,11 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
|
||||
new_hair.set_used_shaders(used_shaders);
|
||||
|
||||
if (view_layer.use_hair) {
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves)) {
|
||||
/* Hair object. */
|
||||
sync_hair(&new_hair, b_ob_info, false);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
else {
|
||||
/* Particle hair. */
|
||||
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
||||
BL::Mesh b_mesh = object_to_mesh(
|
||||
@@ -1064,15 +1071,12 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
|
||||
|
||||
/* Export deformed coordinates. */
|
||||
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves)) {
|
||||
/* Hair object. */
|
||||
sync_hair(hair, b_ob_info, true, motion_step);
|
||||
return;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
else {
|
||||
/* Particle hair. */
|
||||
BL::Mesh b_mesh = object_to_mesh(
|
||||
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
||||
|
@@ -18,11 +18,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
||||
{
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
|
||||
#else
|
||||
if (use_particle_hair) {
|
||||
#endif
|
||||
return Geometry::HAIR;
|
||||
}
|
||||
|
||||
@@ -217,11 +213,7 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
|
||||
#else
|
||||
if (use_particle_hair) {
|
||||
#endif
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
||||
}
|
||||
|
@@ -1,8 +1,10 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "scene/pointcloud.h"
|
||||
#include <optional>
|
||||
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/scene.h"
|
||||
|
||||
#include "blender/sync.h"
|
||||
@@ -138,6 +140,36 @@ static void copy_attributes(PointCloud *pointcloud,
|
||||
}
|
||||
}
|
||||
|
||||
static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_pointcloud)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
|
||||
if (b_attribute.name() != "radius") {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
|
||||
continue;
|
||||
}
|
||||
return BL::FloatAttribute{b_attribute};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcloud)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
|
||||
if (b_attribute.name() != "position") {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
|
||||
continue;
|
||||
}
|
||||
return BL::FloatVectorAttribute{b_attribute};
|
||||
}
|
||||
/* The position attribute must exist. */
|
||||
assert(false);
|
||||
return BL::FloatVectorAttribute{b_pointcloud.attributes[0]};
|
||||
}
|
||||
|
||||
static void export_pointcloud(Scene *scene,
|
||||
PointCloud *pointcloud,
|
||||
BL::PointCloud b_pointcloud,
|
||||
@@ -156,18 +188,18 @@ static void export_pointcloud(Scene *scene,
|
||||
const int num_points = b_pointcloud.points.length();
|
||||
pointcloud->reserve(num_points);
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
|
||||
|
||||
/* Export points. */
|
||||
BL::PointCloud::points_iterator b_point_iter;
|
||||
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||
++b_point_iter) {
|
||||
BL::Point b_point = *b_point_iter;
|
||||
const float3 co = get_float3(b_point.co());
|
||||
const float radius = b_point.radius();
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
const float3 co = get_float3(b_attr_position.data[i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
|
||||
pointcloud->add_point(co, radius);
|
||||
|
||||
/* Random number per point. */
|
||||
if (attr_random != NULL) {
|
||||
attr_random->add(hash_uint2_to_float(b_point.index(), 0));
|
||||
attr_random->add(hash_uint2_to_float(i, 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,14 +227,15 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
|
||||
int num_motion_points = 0;
|
||||
const array<float3> &pointcloud_points = pointcloud->get_points();
|
||||
|
||||
BL::PointCloud::points_iterator b_point_iter;
|
||||
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||
++b_point_iter) {
|
||||
BL::Point b_point = *b_point_iter;
|
||||
BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
|
||||
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
if (num_motion_points < num_points) {
|
||||
float3 P = get_float3(b_point.co());
|
||||
P.w = b_point.radius();
|
||||
const float3 co = get_float3(b_attr_position.data[i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
|
||||
float3 P = co;
|
||||
P.w = radius;
|
||||
mP[num_motion_points] = P;
|
||||
have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
|
||||
num_motion_points++;
|
||||
|
@@ -129,7 +129,7 @@ class BVHParams {
|
||||
|
||||
top_level = false;
|
||||
bvh_layout = BVH_LAYOUT_BVH2;
|
||||
use_compact_structure = true;
|
||||
use_compact_structure = false;
|
||||
use_unaligned_nodes = false;
|
||||
|
||||
num_motion_curve_steps = 0;
|
||||
|
@@ -197,7 +197,7 @@ void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
|
||||
// Update scene handle (since it is different for each device on multi devices)
|
||||
KernelData *const data = (KernelData *)host;
|
||||
data->bvh.scene = embree_scene;
|
||||
data->device_bvh = embree_scene;
|
||||
}
|
||||
#endif
|
||||
kernel_const_copy(&kernel_globals, name, host, size);
|
||||
|
@@ -29,6 +29,7 @@ class DeviceQueue;
|
||||
class Progress;
|
||||
class CPUKernels;
|
||||
class CPUKernelThreadGlobals;
|
||||
class Scene;
|
||||
|
||||
/* Device Types */
|
||||
|
||||
@@ -186,6 +187,11 @@ class Device {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called after kernel texture setup, and prior to integrator state setup. */
|
||||
virtual void optimize_for_scene(Scene * /*scene*/)
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
|
||||
{
|
||||
/* Memory is always resident if this is not a multi device, regardless of whether the pointer
|
||||
|
@@ -75,7 +75,8 @@ class MetalDevice : public Device {
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
|
||||
bool use_metalrt = false;
|
||||
bool use_function_specialisation = false;
|
||||
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
|
||||
std::atomic_bool async_compile_and_load = false;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
@@ -91,9 +92,7 @@ class MetalDevice : public Device {
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
string get_source(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name);
|
||||
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
@@ -111,7 +110,9 @@ class MetalDevice : public Device {
|
||||
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
id<MTLLibrary> compile(string const &source);
|
||||
virtual void optimize_for_scene(Scene *scene) override;
|
||||
|
||||
bool compile_and_load(MetalPipelineType pso_type);
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* low-level memory management */
|
||||
|
@@ -6,6 +6,8 @@
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "device/metal/device.h"
|
||||
|
||||
# include "scene/scene.h"
|
||||
|
||||
# include "util/debug.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
@@ -78,6 +80,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
case METAL_GPU_APPLE: {
|
||||
max_threads_per_threadgroup = 512;
|
||||
use_metalrt = info.use_metalrt;
|
||||
|
||||
/* Specialize the intersection kernels on Apple GPUs by default as these can be built very
|
||||
* quickly. */
|
||||
kernel_specialization_level = PSO_SPECIALIZED_INTERSECT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -90,6 +96,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
capture_enabled = true;
|
||||
}
|
||||
|
||||
if (auto envstr = getenv("CYCLES_METAL_SPECIALIZATION_LEVEL")) {
|
||||
kernel_specialization_level = (MetalPipelineType)atoi(envstr);
|
||||
}
|
||||
metal_printf("kernel_specialization_level = %s\n",
|
||||
kernel_type_as_string(
|
||||
(MetalPipelineType)min((int)kernel_specialization_level, (int)PSO_NUM - 1)));
|
||||
|
||||
MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_params.dataType = MTLDataTypePointer;
|
||||
arg_desc_params.access = MTLArgumentAccessReadOnly;
|
||||
@@ -209,61 +222,86 @@ bool MetalDevice::use_adaptive_compilation()
|
||||
return DebugFlags().metal.adaptive_compile;
|
||||
}
|
||||
|
||||
string MetalDevice::get_source(const uint kernel_features)
|
||||
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
||||
{
|
||||
string build_options;
|
||||
|
||||
string global_defines;
|
||||
if (use_adaptive_compilation()) {
|
||||
build_options += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
|
||||
global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
|
||||
}
|
||||
|
||||
if (use_metalrt) {
|
||||
build_options += "-D__METALRT__ ";
|
||||
global_defines += "#define __METALRT__\n";
|
||||
if (motion_blur) {
|
||||
build_options += "-D__METALRT_MOTION__ ";
|
||||
global_defines += "#define __METALRT_MOTION__\n";
|
||||
}
|
||||
}
|
||||
|
||||
# ifdef WITH_CYCLES_DEBUG
|
||||
build_options += "-D__KERNEL_DEBUG__ ";
|
||||
global_defines += "#define __KERNEL_DEBUG__\n";
|
||||
# endif
|
||||
|
||||
switch (device_vendor) {
|
||||
default:
|
||||
break;
|
||||
case METAL_GPU_INTEL:
|
||||
build_options += "-D__KERNEL_METAL_INTEL__ ";
|
||||
global_defines += "#define __KERNEL_METAL_INTEL__\n";
|
||||
break;
|
||||
case METAL_GPU_AMD:
|
||||
build_options += "-D__KERNEL_METAL_AMD__ ";
|
||||
global_defines += "#define __KERNEL_METAL_AMD__\n";
|
||||
break;
|
||||
case METAL_GPU_APPLE:
|
||||
build_options += "-D__KERNEL_METAL_APPLE__ ";
|
||||
global_defines += "#define __KERNEL_METAL_APPLE__\n";
|
||||
break;
|
||||
}
|
||||
|
||||
/* reformat -D defines list into compilable form */
|
||||
vector<string> components;
|
||||
string_replace(build_options, "-D", "");
|
||||
string_split(components, build_options, " ");
|
||||
|
||||
string globalDefines;
|
||||
for (const string &component : components) {
|
||||
vector<string> assignments;
|
||||
string_split(assignments, component, "=");
|
||||
if (assignments.size() == 2)
|
||||
globalDefines += string_printf(
|
||||
"#define %s %s\n", assignments[0].c_str(), assignments[1].c_str());
|
||||
else
|
||||
globalDefines += string_printf("#define %s\n", assignments[0].c_str());
|
||||
}
|
||||
|
||||
string source = globalDefines + "\n#include \"kernel/device/metal/kernel.metal\"\n";
|
||||
string &source = this->source[pso_type];
|
||||
source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
|
||||
source = path_source_replace_includes(source, path_get("source"));
|
||||
|
||||
metal_printf("Global defines:\n%s\n", globalDefines.c_str());
|
||||
/* Perform any required specialization on the source.
|
||||
* With Metal function constants we can generate a single variant of the kernel source which can
|
||||
* be repeatedly respecialized.
|
||||
*/
|
||||
string baked_constants;
|
||||
|
||||
return source;
|
||||
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
|
||||
* the same character length. Build a string of all active constant values which is then hashed
|
||||
* in order to identify the PSO.
|
||||
*/
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
const double starttime = time_dt();
|
||||
|
||||
# define KERNEL_STRUCT_BEGIN(name, parent) \
|
||||
string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
|
||||
|
||||
/* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
baked_constants += string(#parent "." #name "=") + \
|
||||
to_string(_type(launch_params.data.parent.name)) + "\n";
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
/* Opt in to all of available specializations. This can be made more granular for the
|
||||
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
|
||||
* but the overhead should be negligible as these are very quick to (re)build and aren't
|
||||
* serialized to disk via MTLBinaryArchives.
|
||||
*/
|
||||
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
|
||||
|
||||
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
|
||||
}
|
||||
|
||||
source = global_defines + source;
|
||||
metal_printf("================\n%s================\n\%s================\n",
|
||||
global_defines.c_str(),
|
||||
baked_constants.c_str());
|
||||
|
||||
/* Generate an MD5 from the source and include any baked constants. This is used when caching
|
||||
* PSOs. */
|
||||
MD5Hash md5;
|
||||
md5.append(baked_constants);
|
||||
md5.append(source);
|
||||
source_md5[pso_type] = md5.get_hex();
|
||||
}
|
||||
|
||||
bool MetalDevice::load_kernels(const uint _kernel_features)
|
||||
@@ -279,28 +317,22 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
|
||||
* active, but may still need to be rendered without motion blur if that isn't active as well. */
|
||||
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
source[PSO_GENERIC] = get_source(kernel_features);
|
||||
|
||||
const double starttime = time_dt();
|
||||
|
||||
mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
|
||||
|
||||
metal_printf("Front-end compilation finished in %.1f seconds (generic)\n",
|
||||
time_dt() - starttime);
|
||||
|
||||
MD5Hash md5;
|
||||
md5.append(source[PSO_GENERIC]);
|
||||
source_md5[PSO_GENERIC] = md5.get_hex();
|
||||
|
||||
bool result = MetalDeviceKernels::load(this, false);
|
||||
bool result = compile_and_load(PSO_GENERIC);
|
||||
|
||||
reserve_local_memory(kernel_features);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
id<MTLLibrary> MetalDevice::compile(string const &source)
|
||||
bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
|
||||
{
|
||||
make_source(pso_type, kernel_features);
|
||||
|
||||
if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
|
||||
/* We already have a full set of matching pipelines which are cached or queued. */
|
||||
metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
|
||||
return true;
|
||||
}
|
||||
|
||||
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
|
||||
|
||||
options.fastMathEnabled = YES;
|
||||
@@ -308,19 +340,30 @@ id<MTLLibrary> MetalDevice::compile(string const &source)
|
||||
options.languageVersion = MTLLanguageVersion2_4;
|
||||
}
|
||||
|
||||
NSError *error = NULL;
|
||||
id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
|
||||
options:options
|
||||
error:&error];
|
||||
if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
|
||||
path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
|
||||
source[pso_type]);
|
||||
}
|
||||
|
||||
if (!mtlLibrary) {
|
||||
const double starttime = time_dt();
|
||||
|
||||
NSError *error = NULL;
|
||||
mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
|
||||
options:options
|
||||
error:&error];
|
||||
|
||||
if (!mtlLibrary[pso_type]) {
|
||||
NSString *err = [error localizedDescription];
|
||||
set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
|
||||
}
|
||||
|
||||
metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
|
||||
time_dt() - starttime,
|
||||
kernel_type_as_string(pso_type));
|
||||
|
||||
[options release];
|
||||
|
||||
return mtlLibrary;
|
||||
return MetalDeviceKernels::load(this, pso_type);
|
||||
}
|
||||
|
||||
void MetalDevice::reserve_local_memory(const uint kernel_features)
|
||||
@@ -627,6 +670,58 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MetalDevice::optimize_for_scene(Scene *scene)
|
||||
{
|
||||
MetalPipelineType specialization_level = kernel_specialization_level;
|
||||
|
||||
if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
|
||||
* synchronously. */
|
||||
compile_and_load(PSO_SPECIALIZED_INTERSECT);
|
||||
|
||||
if (specialization_level < PSO_SPECIALIZED_SHADE) {
|
||||
return;
|
||||
}
|
||||
if (!scene->params.background) {
|
||||
/* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
|
||||
* build. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
|
||||
* only if there isn't an existing load in flight.
|
||||
*/
|
||||
auto specialize_shade_fn = ^() {
|
||||
compile_and_load(PSO_SPECIALIZED_SHADE);
|
||||
async_compile_and_load = false;
|
||||
};
|
||||
|
||||
bool async_specialize_shade = true;
|
||||
|
||||
/* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
|
||||
if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
|
||||
async_specialize_shade = false;
|
||||
}
|
||||
|
||||
if (async_specialize_shade) {
|
||||
if (!async_compile_and_load) {
|
||||
async_compile_and_load = true;
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
|
||||
specialize_shade_fn);
|
||||
}
|
||||
else {
|
||||
metal_printf(
|
||||
"Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
|
||||
}
|
||||
}
|
||||
else {
|
||||
specialize_shade_fn();
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
{
|
||||
if (strcmp(name, "data") == 0) {
|
||||
@@ -652,7 +747,7 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
/* Update data storage pointers in launch parameters. */
|
||||
if (strcmp(name, "integrator_state") == 0) {
|
||||
/* IntegratorStateGPU is contiguous pointers */
|
||||
const size_t pointer_block_size = sizeof(IntegratorStateGPU);
|
||||
const size_t pointer_block_size = offsetof(IntegratorStateGPU, sort_partition_divisor);
|
||||
update_launch_pointers(
|
||||
offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size);
|
||||
}
|
||||
|
@@ -31,7 +31,7 @@ enum {
|
||||
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
||||
|
||||
/* Pipeline State Object types */
|
||||
enum {
|
||||
enum MetalPipelineType {
|
||||
/* A kernel that can be used with all scenes, supporting all features.
|
||||
* It is slow to compile, but only needs to be compiled once and is then
|
||||
* cached for future render sessions. This allows a render to get underway
|
||||
@@ -39,28 +39,33 @@ enum {
|
||||
*/
|
||||
PSO_GENERIC,
|
||||
|
||||
/* A kernel that is relatively quick to compile, but is specialized for the
|
||||
* scene being rendered. It only contains the functionality and even baked in
|
||||
* constants for values that means it needs to be recompiled whenever a
|
||||
* dependent setting is changed. The render performance of this kernel is
|
||||
* significantly faster though, and justifies the extra compile time.
|
||||
/* A intersection kernel that is very quick to specialize and results in faster intersection
|
||||
* kernel performance. It uses Metal function constants to replace several KernelData variables
|
||||
* with fixed constants.
|
||||
*/
|
||||
/* METAL_WIP: This isn't used and will require more changes to enable. */
|
||||
PSO_SPECIALISED,
|
||||
PSO_SPECIALIZED_INTERSECT,
|
||||
|
||||
/* A shading kernel that is slow to specialize, but results in faster shading kernel performance
|
||||
* rendered. It uses Metal function constants to replace several KernelData variables with fixed
|
||||
* constants and short-circuit all unused SVM node case handlers.
|
||||
*/
|
||||
PSO_SPECIALIZED_SHADE,
|
||||
|
||||
PSO_NUM
|
||||
};
|
||||
|
||||
const char *kernel_type_as_string(int kernel_type);
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
||||
|
||||
struct MetalKernelPipeline {
|
||||
|
||||
void compile();
|
||||
|
||||
id<MTLLibrary> mtlLibrary = nil;
|
||||
bool scene_specialized;
|
||||
MetalPipelineType pso_type;
|
||||
string source_md5;
|
||||
size_t usage_count = 0;
|
||||
|
||||
KernelData kernel_data_;
|
||||
bool use_metalrt;
|
||||
bool metalrt_hair;
|
||||
bool metalrt_hair_thick;
|
||||
@@ -75,6 +80,8 @@ struct MetalKernelPipeline {
|
||||
id<MTLComputePipelineState> pipeline = nil;
|
||||
int num_threads_per_block = 0;
|
||||
|
||||
bool should_use_binary_archive() const;
|
||||
|
||||
string error_str;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
@@ -85,7 +92,8 @@ struct MetalKernelPipeline {
|
||||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
namespace MetalDeviceKernels {
|
||||
|
||||
bool load(MetalDevice *device, bool scene_specialized);
|
||||
bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
|
||||
bool load(MetalDevice *device, MetalPipelineType pso_type);
|
||||
const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
|
||||
|
||||
} /* namespace MetalDeviceKernels */
|
||||
|
@@ -5,6 +5,7 @@
|
||||
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "kernel/device/metal/function_constants.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/tbb.h"
|
||||
@@ -16,13 +17,15 @@ CCL_NAMESPACE_BEGIN
|
||||
/* limit to 2 MTLCompiler instances */
|
||||
int max_mtlcompiler_threads = 2;
|
||||
|
||||
const char *kernel_type_as_string(int kernel_type)
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type)
|
||||
{
|
||||
switch (kernel_type) {
|
||||
switch (pso_type) {
|
||||
case PSO_GENERIC:
|
||||
return "PSO_GENERIC";
|
||||
case PSO_SPECIALISED:
|
||||
return "PSO_SPECIALISED";
|
||||
case PSO_SPECIALIZED_INTERSECT:
|
||||
return "PSO_SPECIALIZED_INTERSECT";
|
||||
case PSO_SPECIALIZED_SHADE:
|
||||
return "PSO_SPECIALIZED_SHADE";
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@@ -50,7 +53,11 @@ struct ShaderCache {
|
||||
|
||||
/* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
|
||||
* device. */
|
||||
void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
|
||||
void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
|
||||
|
||||
bool should_load_kernel(DeviceKernel device_kernel,
|
||||
MetalDevice *device,
|
||||
MetalPipelineType pso_type);
|
||||
|
||||
void wait_for_all();
|
||||
|
||||
@@ -139,9 +146,53 @@ void ShaderCache::compile_thread_func(int thread_index)
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
|
||||
MetalDevice *device,
|
||||
MetalPipelineType pso_type)
|
||||
{
|
||||
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
/* Skip megakernel. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
if ((device->kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) {
|
||||
/* Skip shade_surface_raytrace kernel if the scene doesn't require it. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
/* Only specialize kernels where it can make an impact. */
|
||||
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Only specialize shading / intersection kernels as requested. */
|
||||
bool is_shade_kernel = (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
|
||||
if (is_shade_pso != is_shade_kernel) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/* check whether the kernel has already been requested / cached */
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &pipeline : pipelines[device_kernel]) {
|
||||
if (pipeline->source_md5 == device->source_md5[pso_type]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
MetalDevice *device,
|
||||
bool scene_specialized)
|
||||
MetalPipelineType pso_type)
|
||||
{
|
||||
{
|
||||
/* create compiler threads on first run */
|
||||
@@ -154,52 +205,21 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
}
|
||||
}
|
||||
|
||||
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
/* skip megakernel */
|
||||
if (!should_load_kernel(device_kernel, device, pso_type)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (scene_specialized) {
|
||||
/* Only specialize kernels where it can make an impact. */
|
||||
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/* check whether the kernel has already been requested / cached */
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &pipeline : pipelines[device_kernel]) {
|
||||
if (scene_specialized) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
|
||||
/* we already requested a pipeline that is specialized for this kernel data */
|
||||
metal_printf("Specialized kernel already requested (%s)\n",
|
||||
device_kernel_as_string(device_kernel));
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
|
||||
/* we already requested a generic pipeline for this kernel */
|
||||
metal_printf("Generic kernel already requested (%s)\n",
|
||||
device_kernel_as_string(device_kernel));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
incomplete_requests++;
|
||||
|
||||
PipelineRequest request;
|
||||
request.pipeline = new MetalKernelPipeline;
|
||||
request.pipeline->scene_specialized = scene_specialized;
|
||||
memcpy(&request.pipeline->kernel_data_,
|
||||
&device->launch_params.data,
|
||||
sizeof(request.pipeline->kernel_data_));
|
||||
request.pipeline->pso_type = pso_type;
|
||||
request.pipeline->mtlDevice = mtlDevice;
|
||||
request.pipeline->source_md5 =
|
||||
device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
|
||||
request.pipeline->mtlLibrary =
|
||||
device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
|
||||
request.pipeline->source_md5 = device->source_md5[pso_type];
|
||||
request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
|
||||
request.pipeline->device_kernel = device_kernel;
|
||||
request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
|
||||
|
||||
@@ -214,7 +234,24 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
|
||||
auto &collection = pipelines[device_kernel];
|
||||
|
||||
/* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
|
||||
int max_entries_of_same_pso_type = 3;
|
||||
for (int i = (int)collection.size() - 1; i >= 0; i--) {
|
||||
if (collection[i]->pso_type == pso_type) {
|
||||
max_entries_of_same_pso_type -= 1;
|
||||
if (max_entries_of_same_pso_type == 0) {
|
||||
metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
|
||||
kernel_type_as_string(pso_type),
|
||||
device_kernel_as_string(device_kernel));
|
||||
collection.erase(collection.begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
|
||||
request_queue.push_back(request);
|
||||
}
|
||||
cond_var.notify_one();
|
||||
@@ -248,8 +285,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pipeline->scene_specialized) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
|
||||
if (pipeline->pso_type != PSO_GENERIC) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
|
||||
pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
|
||||
best_pipeline = pipeline.get();
|
||||
}
|
||||
}
|
||||
@@ -258,13 +296,65 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
||||
}
|
||||
}
|
||||
|
||||
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
|
||||
metal_printf("Swapping in %s version of %s\n",
|
||||
kernel_type_as_string(best_pipeline->pso_type),
|
||||
device_kernel_as_string(kernel));
|
||||
}
|
||||
best_pipeline->usage_count += 1;
|
||||
|
||||
return best_pipeline;
|
||||
}
|
||||
|
||||
bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
{
|
||||
if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||
if (atoi(str) != 0) {
|
||||
/* Don't archive if we have opted out by env var. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pso_type == PSO_GENERIC) {
|
||||
/* Archive the generic kernels. */
|
||||
return true;
|
||||
}
|
||||
|
||||
if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
|
||||
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
|
||||
/* Archive all shade kernels - they take a long time to compile. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The remaining kernels are all fast to compile. They may get cached by the system shader cache,
|
||||
* but will be quick to regenerate if not. */
|
||||
return false;
|
||||
}
|
||||
|
||||
static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nullptr)
|
||||
{
|
||||
MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues new];
|
||||
|
||||
MTLDataType MTLDataType_int = MTLDataTypeInt;
|
||||
MTLDataType MTLDataType_float = MTLDataTypeFloat;
|
||||
MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
|
||||
KernelData zero_data = {0};
|
||||
if (!data) {
|
||||
data = &zero_data;
|
||||
}
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
[constant_values setConstantValue:&data->parent.name \
|
||||
type:MTLDataType_##_type \
|
||||
atIndex:KernelData_##parent##_##name];
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
return constant_values;
|
||||
}
|
||||
|
||||
void MetalKernelPipeline::compile()
|
||||
{
|
||||
int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
|
||||
|
||||
const std::string function_name = std::string("cycles_metal_") +
|
||||
device_kernel_as_string(device_kernel);
|
||||
|
||||
@@ -281,6 +371,17 @@ void MetalKernelPipeline::compile()
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = entryPoint;
|
||||
|
||||
if (pso_type == PSO_SPECIALIZED_SHADE) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
func_desc.constantValues = GetConstantValues();
|
||||
}
|
||||
|
||||
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
|
||||
}
|
||||
|
||||
@@ -427,10 +528,7 @@ void MetalKernelPipeline::compile()
|
||||
|
||||
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||
|
||||
bool use_binary_archive = true;
|
||||
if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||
use_binary_archive = (atoi(str) == 0);
|
||||
}
|
||||
bool use_binary_archive = should_use_binary_archive();
|
||||
|
||||
id<MTLBinaryArchive> archive = nil;
|
||||
string metalbin_path;
|
||||
@@ -608,17 +706,30 @@ void MetalKernelPipeline::compile()
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
|
||||
bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
|
||||
{
|
||||
const double starttime = time_dt();
|
||||
auto shader_cache = get_shader_cache(device->mtlDevice);
|
||||
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
|
||||
shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
|
||||
}
|
||||
|
||||
shader_cache->wait_for_all();
|
||||
metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
|
||||
time_dt() - starttime,
|
||||
kernel_type_as_string(pso_type));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
|
||||
{
|
||||
auto shader_cache = get_shader_cache(device->mtlDevice);
|
||||
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
|
||||
shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
|
||||
if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
|
||||
shader_cache->wait_for_all();
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
|
||||
|
@@ -24,6 +24,7 @@ class MetalDeviceQueue : public DeviceQueue {
|
||||
|
||||
virtual int num_concurrent_states(const size_t) const override;
|
||||
virtual int num_concurrent_busy_states() const override;
|
||||
virtual int num_sort_partition_elements() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
|
@@ -293,6 +293,11 @@ int MetalDeviceQueue::num_concurrent_busy_states() const
|
||||
return result;
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_sort_partition_elements() const
|
||||
{
|
||||
return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::init_execution()
|
||||
{
|
||||
/* Synchronize all textures and memory copies before executing task. */
|
||||
@@ -359,7 +364,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
/* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
|
||||
/* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
|
||||
size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) +
|
||||
sizeof(IntegratorStateGPU);
|
||||
offsetof(IntegratorStateGPU, sort_partition_divisor);
|
||||
size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
|
||||
memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
|
||||
(uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
|
||||
@@ -416,7 +421,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
|
||||
const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) +
|
||||
sizeof(IntegratorStateGPU);
|
||||
offsetof(IntegratorStateGPU, sort_partition_divisor);
|
||||
for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
|
||||
int pointer_index = int(offset / sizeof(device_ptr));
|
||||
MetalDevice::MetalMem *mmem = *(
|
||||
|
@@ -37,6 +37,7 @@ struct MetalInfo {
|
||||
static int get_apple_gpu_core_count(id<MTLDevice> device);
|
||||
static MetalGPUVendor get_device_vendor(id<MTLDevice> device);
|
||||
static AppleGPUArchitecture get_apple_gpu_architecture(id<MTLDevice> device);
|
||||
static int optimal_sort_partition_elements(id<MTLDevice> device);
|
||||
static string get_device_name(id<MTLDevice> device);
|
||||
};
|
||||
|
||||
|
@@ -72,6 +72,21 @@ MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
|
||||
return METAL_GPU_UNKNOWN;
|
||||
}
|
||||
|
||||
int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device)
|
||||
{
|
||||
if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
|
||||
return atoi(str);
|
||||
}
|
||||
|
||||
/* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
|
||||
* sorting each partition by material. Partitioning into chunks of 65536 elements results in an
|
||||
* overall render time speedup of up to 15%. */
|
||||
if (get_device_vendor(device) == METAL_GPU_APPLE) {
|
||||
return 65536;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
{
|
||||
static vector<id<MTLDevice>> usable_devices;
|
||||
|
@@ -2047,7 +2047,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
|
||||
/* Update traversable handle (since it is different for each device on multi devices). */
|
||||
KernelData *const data = (KernelData *)host;
|
||||
*(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
|
||||
*(OptixTraversableHandle *)&data->device_bvh = tlas_handle;
|
||||
|
||||
update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
|
||||
return;
|
||||
|
@@ -105,6 +105,13 @@ class DeviceQueue {
|
||||
* value. */
|
||||
virtual int num_concurrent_busy_states() const = 0;
|
||||
|
||||
/* Number of elements in a partition of sorted shaders, that improves memory locality of
|
||||
* integrator state fetch at the cost of decreased coherence for shader kernel execution. */
|
||||
virtual int num_sort_partition_elements() const
|
||||
{
|
||||
return 65536;
|
||||
}
|
||||
|
||||
/* Initialize execution of kernels on this queue.
|
||||
*
|
||||
* Will, for example, load all data required by the kernels from Device to global or path state.
|
||||
|
@@ -373,7 +373,7 @@ void PathTrace::path_trace(RenderWork &render_work)
|
||||
work_balance_infos_[i].time_spent += work_time;
|
||||
work_balance_infos_[i].occupancy = statistics.occupancy;
|
||||
|
||||
VLOG_WORK << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
|
||||
VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
|
||||
<< work_time / num_samples
|
||||
<< " seconds per sample), occupancy: " << statistics.occupancy;
|
||||
});
|
||||
|
@@ -181,27 +181,45 @@ void PathTraceWorkGPU::alloc_integrator_queue()
|
||||
|
||||
void PathTraceWorkGPU::alloc_integrator_sorting()
|
||||
{
|
||||
/* Compute sort partitions, to balance between memory locality and coherence.
|
||||
* Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
|
||||
* more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
|
||||
*/
|
||||
num_sort_partitions_ = 1;
|
||||
if (device_scene_->data.max_shaders < 300) {
|
||||
const int num_elements = queue_->num_sort_partition_elements();
|
||||
if (num_elements) {
|
||||
num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
|
||||
}
|
||||
}
|
||||
|
||||
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
|
||||
num_sort_partitions_);
|
||||
|
||||
/* Allocate arrays for shader sorting. */
|
||||
const int max_shaders = device_scene_->data.max_shaders;
|
||||
if (integrator_shader_sort_counter_.size() < max_shaders) {
|
||||
integrator_shader_sort_counter_.alloc(max_shaders);
|
||||
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||
if (integrator_shader_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_mnee_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_sort_prefix_sum_.alloc(max_shaders);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
|
||||
(int *)integrator_shader_sort_counter_.device_pointer;
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
}
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
}
|
||||
|
||||
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,8 +351,12 @@ void PathTraceWorkGPU::enqueue_reset()
|
||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
|
||||
queue_->zero_to_device(integrator_queue_counter_);
|
||||
queue_->zero_to_device(integrator_shader_sort_counter_);
|
||||
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
|
||||
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
|
||||
}
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
|
||||
}
|
||||
|
||||
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
|
||||
* counter on the host side because `zero_to_device()` is not doing it. */
|
||||
@@ -486,9 +508,9 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
||||
/* Compute prefix sum of number of active paths with each shader. */
|
||||
{
|
||||
const int work_size = 1;
|
||||
int max_shaders = device_scene_->data.max_shaders;
|
||||
int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||
|
||||
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
|
||||
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &sort_buckets);
|
||||
|
||||
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
|
||||
}
|
||||
|
@@ -156,6 +156,9 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
bool interop_use_checked_ = false;
|
||||
bool interop_use_ = false;
|
||||
|
||||
/* Number of partitions to sort state indices into prior to material sort. */
|
||||
int num_sort_partitions_;
|
||||
|
||||
/* Maximum number of concurrent integrator states. */
|
||||
int max_num_paths_;
|
||||
|
||||
|
@@ -79,6 +79,7 @@ set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
||||
device/metal/compat.h
|
||||
device/metal/context_begin.h
|
||||
device/metal/context_end.h
|
||||
device/metal/function_constants.h
|
||||
device/metal/globals.h
|
||||
)
|
||||
|
||||
@@ -154,6 +155,7 @@ set(SRC_KERNEL_SVM_HEADERS
|
||||
svm/math_util.h
|
||||
svm/mix.h
|
||||
svm/musgrave.h
|
||||
svm/node_types_template.h
|
||||
svm/noise.h
|
||||
svm/noisetex.h
|
||||
svm/normal.h
|
||||
@@ -282,6 +284,7 @@ set(SRC_KERNEL_UTIL_HEADERS
|
||||
|
||||
set(SRC_KERNEL_TYPES_HEADERS
|
||||
data_arrays.h
|
||||
data_template.h
|
||||
tables.h
|
||||
types.h
|
||||
)
|
||||
@@ -844,10 +847,9 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
else()
|
||||
list(APPEND sycl_compiler_flags -fPIC)
|
||||
|
||||
# avoid getting __FAST_MATH__ to be defined for the graphics compiler on CentOS 7 until the compile-time issue it triggers gets fixed.
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES)
|
||||
list(APPEND sycl_compiler_flags -fhonor-nans)
|
||||
endif()
|
||||
# We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash
|
||||
# it triggers at either AoT or JIT stages gets fixed.
|
||||
list(APPEND sycl_compiler_flags -fhonor-nans)
|
||||
|
||||
# add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and
|
||||
# libpi_level_zero.so can be placed next to it and get found.
|
||||
|
@@ -172,11 +172,11 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
ray_flags,
|
||||
@@ -203,28 +203,28 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
#elif defined(__METALRT__)
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
isect->t = ray->t;
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
isect->t = ray->t;
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||
isect->t = ray->t;
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid ift_default");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
@@ -263,7 +263,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
# endif
|
||||
|
||||
if (intersection.type == intersection_type::none) {
|
||||
isect->t = ray->t;
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
|
||||
return false;
|
||||
@@ -295,14 +295,14 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
isect->t = ray->t;
|
||||
if (kernel_data.device_bvh) {
|
||||
isect->t = ray->tmax;
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRayHit ray_hit;
|
||||
ctx.ray = ray;
|
||||
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
|
||||
rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
|
||||
rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
|
||||
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
|
||||
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
|
||||
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
|
||||
@@ -357,11 +357,11 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0; /* Initialize hit count to zero. */
|
||||
}
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
0xFF,
|
||||
/* Need to always call into __anyhit__kernel_optix_local_hit. */
|
||||
@@ -405,7 +405,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
@@ -451,7 +451,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
if (kernel_data.device_bvh) {
|
||||
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
|
||||
SD_OBJECT_TRANSFORM_APPLIED);
|
||||
CCLIntersectContext ctx(
|
||||
@@ -470,13 +470,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
|
||||
/* If this object has its own BVH, use it. */
|
||||
if (has_bvh) {
|
||||
RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
|
||||
RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
|
||||
if (geom) {
|
||||
float3 P = ray->P;
|
||||
float3 dir = ray->D;
|
||||
float3 idir = ray->D;
|
||||
Transform ob_itfm;
|
||||
rtc_ray.tfar = ray->t *
|
||||
rtc_ray.tfar = ray->tmax *
|
||||
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
/* bvh_instance_motion_push() returns the inverse transform but
|
||||
* it's not needed here. */
|
||||
@@ -496,7 +496,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
}
|
||||
|
||||
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
|
||||
@@ -539,11 +539,11 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
|
||||
@@ -582,7 +582,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
@@ -633,7 +633,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
if (kernel_data.device_bvh) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
Intersection *isect_array = (Intersection *)state->shadow_isect;
|
||||
ctx.isect_s = isect_array;
|
||||
@@ -642,7 +642,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
|
||||
*num_recorded_hits = ctx.num_recorded_hits;
|
||||
*throughput = ctx.throughput;
|
||||
@@ -698,11 +698,11 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_volume_test. */
|
||||
@@ -744,7 +744,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
@@ -825,7 +825,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
if (kernel_data.device_bvh) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
ctx.isect_s = isect;
|
||||
ctx.max_hits = max_hits;
|
||||
@@ -834,7 +834,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
return ctx.num_hits;
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
|
@@ -83,8 +83,8 @@ ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
|
||||
rtc_ray.dir_x = ray.D.x;
|
||||
rtc_ray.dir_y = ray.D.y;
|
||||
rtc_ray.dir_z = ray.D.z;
|
||||
rtc_ray.tnear = 0.0f;
|
||||
rtc_ray.tfar = ray.t;
|
||||
rtc_ray.tnear = ray.tmin;
|
||||
rtc_ray.tfar = ray.tmax;
|
||||
rtc_ray.time = ray.time;
|
||||
rtc_ray.mask = visibility;
|
||||
}
|
||||
@@ -107,7 +107,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
|
||||
const int oID = hit->instID[0] / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
const int pID = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
@@ -117,7 +117,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
|
||||
const int oID = hit->geomID / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
}
|
||||
}
|
||||
@@ -133,14 +133,14 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
|
||||
isect->t = ray->tfar;
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = hit->instID[0] / 2;
|
||||
}
|
||||
else {
|
||||
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
isect->object = hit->geomID / 2;
|
||||
}
|
||||
|
||||
@@ -166,7 +166,7 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
|
||||
isect->v = hit->u;
|
||||
isect->t = ray->tfar;
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
|
||||
rtcGetGeometry(kernel_data.device_bvh, object * 2));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = object;
|
||||
|
@@ -47,8 +47,9 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
float isect_t = ray->t;
|
||||
float isect_t = ray->tmax;
|
||||
|
||||
if (local_isect != NULL) {
|
||||
local_isect->num_hits = 0;
|
||||
@@ -59,10 +60,13 @@ ccl_device_inline
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
const float t_world_to_instance = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
isect_t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
object = local_object;
|
||||
}
|
||||
|
||||
@@ -81,6 +85,7 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect_t,
|
||||
node_addr,
|
||||
PATH_RAY_ALL_VISIBILITY,
|
||||
@@ -155,6 +160,7 @@ ccl_device_inline
|
||||
local_object,
|
||||
prim,
|
||||
prim_addr,
|
||||
tmin,
|
||||
isect_t,
|
||||
lcg_state,
|
||||
max_hits)) {
|
||||
@@ -191,6 +197,7 @@ ccl_device_inline
|
||||
local_object,
|
||||
prim,
|
||||
prim_addr,
|
||||
tmin,
|
||||
isect_t,
|
||||
lcg_state,
|
||||
max_hits)) {
|
||||
|
@@ -18,7 +18,8 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
@@ -39,8 +40,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
|
||||
float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
|
||||
float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
|
||||
float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
|
||||
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
@@ -48,8 +49,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
|
||||
float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
|
||||
float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
|
||||
float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
|
||||
|
||||
dist[0] = c0min;
|
||||
dist[1] = c1min;
|
||||
@@ -66,7 +67,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float t,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
int node_addr,
|
||||
int child,
|
||||
float dist[2])
|
||||
@@ -83,8 +85,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
|
||||
const float far_x = max(lower_xyz.x, upper_xyz.x);
|
||||
const float far_y = max(lower_xyz.y, upper_xyz.y);
|
||||
const float far_z = max(lower_xyz.z, upper_xyz.z);
|
||||
const float tnear = max4(0.0f, near_x, near_y, near_z);
|
||||
const float tfar = min4(t, far_x, far_y, far_z);
|
||||
const float tnear = max4(tmin, near_x, near_y, near_z);
|
||||
const float tfar = min4(tmax, far_x, far_y, far_z);
|
||||
*dist = tnear;
|
||||
return tnear <= tfar;
|
||||
}
|
||||
@@ -93,7 +95,8 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
@@ -102,7 +105,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0);
|
||||
#endif
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((__float_as_uint(cnodes.x) & visibility))
|
||||
#endif
|
||||
@@ -110,7 +113,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
|
||||
mask |= 1;
|
||||
}
|
||||
}
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((__float_as_uint(cnodes.y) & visibility))
|
||||
#endif
|
||||
@@ -125,16 +128,17 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
float4 node = kernel_data_fetch(bvh_nodes, node_addr);
|
||||
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
||||
return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
|
||||
return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist);
|
||||
}
|
||||
else {
|
||||
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
|
||||
return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist);
|
||||
}
|
||||
}
|
||||
|
@@ -49,6 +49,7 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
uint num_hits = 0;
|
||||
|
||||
@@ -59,12 +60,12 @@ ccl_device_inline
|
||||
/* Max distance in world space. May be dynamically reduced when max number of
|
||||
* recorded hits is exceeded and we no longer need to find hits beyond the max
|
||||
* distance found. */
|
||||
float t_max_world = ray->t;
|
||||
float t_max_world = ray->tmax;
|
||||
|
||||
/* Current maximum distance to the intersection.
|
||||
* Is calculated as a ray length, transformed to an object space when entering
|
||||
* instance node. */
|
||||
float t_max_current = ray->t;
|
||||
float t_max_current = ray->tmax;
|
||||
|
||||
/* Conversion from world to local space for the current instance if any, 1.0
|
||||
* otherwise. */
|
||||
@@ -88,6 +89,7 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -156,8 +158,16 @@ ccl_device_inline
|
||||
|
||||
switch (type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(
|
||||
kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr);
|
||||
hit = triangle_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr);
|
||||
break;
|
||||
}
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
@@ -166,6 +176,7 @@ ccl_device_inline
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -189,8 +200,16 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
hit = curve_intersect(
|
||||
kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type);
|
||||
hit = curve_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
prim_object,
|
||||
prim,
|
||||
ray->time,
|
||||
curve_type);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -207,8 +226,16 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
const int point_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
hit = point_intersect(
|
||||
kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type);
|
||||
hit = point_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
prim_object,
|
||||
prim,
|
||||
ray->time,
|
||||
point_type);
|
||||
break;
|
||||
}
|
||||
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
|
||||
@@ -302,6 +329,7 @@ ccl_device_inline
|
||||
|
||||
/* Convert intersection to object space. */
|
||||
t_max_current *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
@@ -323,7 +351,8 @@ ccl_device_inline
|
||||
#endif
|
||||
|
||||
/* Restore world space ray length. */
|
||||
t_max_current = ray->t;
|
||||
tmin = ray->tmin;
|
||||
t_max_current = ray->tmax;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
t_world_to_instance = 1.0f;
|
||||
|
@@ -43,13 +43,14 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
isect->t = ray->t;
|
||||
isect->t = ray->tmax;
|
||||
isect->u = 0.0f;
|
||||
isect->v = 0.0f;
|
||||
isect->prim = PRIM_NONE;
|
||||
@@ -71,6 +72,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect->t,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -133,8 +135,16 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
|
||||
switch (type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
if (triangle_intersect(
|
||||
kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) {
|
||||
if (triangle_intersect(kg,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect->t,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr)) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
return true;
|
||||
@@ -147,6 +157,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect->t,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -174,7 +185,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
|
||||
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
const bool hit = curve_intersect(
|
||||
kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type);
|
||||
kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type);
|
||||
if (hit) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
@@ -195,7 +206,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
|
||||
const int point_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
const bool hit = point_intersect(
|
||||
kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type);
|
||||
kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type);
|
||||
if (hit) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
@@ -212,11 +223,15 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
object = kernel_data_fetch(prim_object, -prim_addr - 1);
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect->t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -235,6 +250,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
#else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
#endif
|
||||
tmin = ray->tmin;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
|
@@ -5,6 +5,19 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Offset intersection distance by the smallest possible amount, to skip
|
||||
* intersections at this distance. This works in cases where the ray start
|
||||
* position is unchanged and only tmin is updated, since for self
|
||||
* intersection we'll be comparing against the exact same distances. */
|
||||
ccl_device_forceinline float intersection_t_offset(const float t)
|
||||
{
|
||||
/* This is a simplified version of nextafterf(t, FLT_MAX), only dealing with
|
||||
* non-negative and finite t. */
|
||||
kernel_assert(t >= 0.0f && isfinite_safe(t));
|
||||
const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
|
||||
return __uint_as_float(bits);
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_CPU__)
|
||||
ccl_device int intersections_compare(const void *a, const void *b)
|
||||
{
|
||||
|
@@ -46,13 +46,14 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
isect->t = ray->t;
|
||||
isect->t = ray->tmax;
|
||||
isect->u = 0.0f;
|
||||
isect->v = 0.0f;
|
||||
isect->prim = PRIM_NONE;
|
||||
@@ -73,6 +74,7 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect->t,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -140,7 +142,7 @@ ccl_device_inline
|
||||
continue;
|
||||
}
|
||||
triangle_intersect(
|
||||
kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr);
|
||||
kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -165,6 +167,7 @@ ccl_device_inline
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect->t,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -186,11 +189,15 @@ ccl_device_inline
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if (object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect->t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -217,6 +224,8 @@ ccl_device_inline
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
#endif
|
||||
|
||||
tmin = ray->tmin;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
--stack_ptr;
|
||||
|
@@ -44,12 +44,12 @@ ccl_device_inline
|
||||
int node_addr = kernel_data.bvh.root;
|
||||
|
||||
/* ray parameters in registers */
|
||||
const float tmax = ray->t;
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
float isect_t = tmax;
|
||||
float isect_t = ray->tmax;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
@@ -58,7 +58,7 @@ ccl_device_inline
|
||||
int num_hits_in_instance = 0;
|
||||
|
||||
uint num_hits = 0;
|
||||
isect_array->t = tmax;
|
||||
isect_array->t = ray->tmax;
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
@@ -75,6 +75,7 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect_t,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -141,8 +142,16 @@ ccl_device_inline
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
continue;
|
||||
}
|
||||
hit = triangle_intersect(
|
||||
kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr);
|
||||
hit = triangle_intersect(kg,
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect_t,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr);
|
||||
if (hit) {
|
||||
/* Move on to next entry in intersections array. */
|
||||
isect_array++;
|
||||
@@ -189,6 +198,7 @@ ccl_device_inline
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect_t,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -232,11 +242,15 @@ ccl_device_inline
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if (object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect_t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
num_hits_in_instance = 0;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
@@ -280,7 +294,8 @@ ccl_device_inline
|
||||
#endif
|
||||
}
|
||||
|
||||
isect_t = tmax;
|
||||
tmin = ray->tmin;
|
||||
isect_t = ray->tmax;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
|
@@ -165,9 +165,11 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
|
||||
float nearclip = kernel_data.cam.nearclip * z_inv;
|
||||
ray->P += nearclip * ray->D;
|
||||
ray->dP += nearclip * ray->dD;
|
||||
ray->t = kernel_data.cam.cliplength * z_inv;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = kernel_data.cam.cliplength * z_inv;
|
||||
#else
|
||||
ray->t = FLT_MAX;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -231,9 +233,11 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,
|
||||
|
||||
#ifdef __CAMERA_CLIPPING__
|
||||
/* clipping */
|
||||
ray->t = kernel_data.cam.cliplength;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = kernel_data.cam.cliplength;
|
||||
#else
|
||||
ray->t = FLT_MAX;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -258,7 +262,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
|
||||
|
||||
/* indicates ray should not receive any light, outside of the lens */
|
||||
if (is_zero(D)) {
|
||||
ray->t = 0.0f;
|
||||
ray->tmax = 0.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -349,9 +353,11 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
|
||||
float nearclip = cam->nearclip;
|
||||
ray->P += nearclip * ray->D;
|
||||
ray->dP += nearclip * ray->dD;
|
||||
ray->t = cam->cliplength;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = cam->cliplength;
|
||||
#else
|
||||
ray->t = FLT_MAX;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -368,7 +374,7 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
|
||||
ccl_private Ray *ray)
|
||||
{
|
||||
/* pixel filter */
|
||||
int filter_table_offset = kernel_data.film.filter_table_offset;
|
||||
int filter_table_offset = kernel_data.tables.filter_table_offset;
|
||||
float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
|
||||
float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
|
||||
|
||||
|
206
intern/cycles/kernel/data_template.h
Normal file
206
intern/cycles/kernel/data_template.h
Normal file
@@ -0,0 +1,206 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef KERNEL_STRUCT_BEGIN
|
||||
# define KERNEL_STRUCT_BEGIN(name, parent)
|
||||
#endif
|
||||
#ifndef KERNEL_STRUCT_END
|
||||
# define KERNEL_STRUCT_END(name)
|
||||
#endif
|
||||
#ifndef KERNEL_STRUCT_MEMBER
|
||||
# define KERNEL_STRUCT_MEMBER(parent, type, name)
|
||||
#endif
|
||||
|
||||
/* Background. */
|
||||
|
||||
KERNEL_STRUCT_BEGIN(KernelBackground, background)
|
||||
/* xyz store direction, w the angle. float4 instead of float3 is used
|
||||
* to ensure consistent padding/alignment across devices. */
|
||||
KERNEL_STRUCT_MEMBER(background, float4, sun)
|
||||
/* Only shader index. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, surface_shader)
|
||||
KERNEL_STRUCT_MEMBER(background, int, volume_shader)
|
||||
KERNEL_STRUCT_MEMBER(background, float, volume_step_size)
|
||||
KERNEL_STRUCT_MEMBER(background, int, transparent)
|
||||
KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold)
|
||||
/* Portal sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, float, portal_weight)
|
||||
KERNEL_STRUCT_MEMBER(background, int, num_portals)
|
||||
KERNEL_STRUCT_MEMBER(background, int, portal_offset)
|
||||
/* Sun sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, float, sun_weight)
|
||||
/* Importance map sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, float, map_weight)
|
||||
KERNEL_STRUCT_MEMBER(background, int, map_res_x)
|
||||
KERNEL_STRUCT_MEMBER(background, int, map_res_y)
|
||||
/* Multiple importance sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, use_mis)
|
||||
/* Lightgroup. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, lightgroup)
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad2)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad3)
|
||||
KERNEL_STRUCT_END(KernelBackground)
|
||||
|
||||
/* BVH: own BVH2 if no native device acceleration struct used. */
|
||||
|
||||
KERNEL_STRUCT_BEGIN(KernelBVH, bvh)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, root)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, pad2)
|
||||
KERNEL_STRUCT_END(KernelBVH)
|
||||
|
||||
/* Film. */
|
||||
|
||||
KERNEL_STRUCT_BEGIN(KernelFilm, film)
|
||||
/* XYZ to rendering color space transform. float4 instead of float3 to
|
||||
* ensure consistent padding/alignment across devices. */
|
||||
KERNEL_STRUCT_MEMBER(film, float4, xyz_to_r)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, xyz_to_g)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, xyz_to_b)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, rgb_to_y)
|
||||
/* Rec709 to rendering color space. */
|
||||
KERNEL_STRUCT_MEMBER(film, float4, rec709_to_r)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, rec709_to_g)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, rec709_to_b)
|
||||
KERNEL_STRUCT_MEMBER(film, int, is_rec709)
|
||||
/* Exposuse. */
|
||||
KERNEL_STRUCT_MEMBER(film, float, exposure)
|
||||
/* Passed used. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_flag)
|
||||
KERNEL_STRUCT_MEMBER(film, int, light_pass_flag)
|
||||
/* Pass offsets. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_stride)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_combined)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_depth)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_position)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_normal)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_roughness)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_motion)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_motion_weight)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_uv)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_object_id)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_material_id)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_color)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_glossy_color)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_transmission_color)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_indirect)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_glossy_indirect)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_transmission_indirect)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_volume_indirect)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_direct)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_glossy_direct)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_transmission_direct)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_volume_direct)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_emission)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_background)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_ao)
|
||||
KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow)
|
||||
KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte)
|
||||
/* Cryptomatte. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, cryptomatte_passes)
|
||||
KERNEL_STRUCT_MEMBER(film, int, cryptomatte_depth)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_cryptomatte)
|
||||
/* Adaptive sampling. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_adaptive_aux_buffer)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_sample_count)
|
||||
/* Mist. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_mist)
|
||||
KERNEL_STRUCT_MEMBER(film, float, mist_start)
|
||||
KERNEL_STRUCT_MEMBER(film, float, mist_inv_depth)
|
||||
KERNEL_STRUCT_MEMBER(film, float, mist_falloff)
|
||||
/* Denoising. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_denoising_normal)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_denoising_albedo)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_denoising_depth)
|
||||
/* AOVs. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_aov_color)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_aov_value)
|
||||
/* Light groups. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_lightgroup)
|
||||
/* Baking. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_bake_primitive)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_bake_differential)
|
||||
/* Shadow catcher. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pad2)
|
||||
KERNEL_STRUCT_END(KernelFilm)
|
||||
|
||||
/* Integrator. */
|
||||
|
||||
KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator)
|
||||
/* Emission. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_distribution)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold)
|
||||
/* Bounces. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, min_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, max_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, max_diffuse_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, max_glossy_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, max_transmission_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, max_volume_bounce)
|
||||
/* AO bounces. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, ao_bounces)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_distance)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_factor)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, ao_additive_factor)
|
||||
/* Transparency. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, transparent_min_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, transparent_max_bounce)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, transparent_shadows)
|
||||
/* Caustics. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, caustics_reflective)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, caustics_refractive)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, filter_glossy)
|
||||
/* Seed. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, seed)
|
||||
/* Clamp. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
|
||||
/* MIS. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
|
||||
/* Caustics. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
|
||||
/* Sampling pattern. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
|
||||
/* Volume render. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, volume_step_rate)
|
||||
/* Shadow catcher. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
|
||||
/* Closure filter. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
|
||||
/* MIS debugging. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
|
||||
/* Padding */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, pad1)
|
||||
KERNEL_STRUCT_END(KernelIntegrator)
|
||||
|
||||
/* SVM. For shader specialization. */
|
||||
|
||||
KERNEL_STRUCT_BEGIN(KernelSVMUsage, svm_usage)
|
||||
#define SHADER_NODE_TYPE(type) KERNEL_STRUCT_MEMBER(svm_usage, int, type)
|
||||
#include "kernel/svm/node_types_template.h"
|
||||
KERNEL_STRUCT_END(KernelSVMUsage)
|
||||
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
@@ -246,7 +246,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
|
||||
constant int __dummy_constant [[function_constant(0)]];
|
||||
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
|
15
intern/cycles/kernel/device/metal/function_constants.h
Normal file
15
intern/cycles/kernel/device/metal/function_constants.h
Normal file
@@ -0,0 +1,15 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
enum {
|
||||
Kernel_DummyConstant,
|
||||
#define KERNEL_STRUCT_MEMBER(parent, type, name) KernelData_##parent##_##name,
|
||||
#include "kernel/data_template.h"
|
||||
};
|
||||
|
||||
#ifdef __KERNEL_METAL__
|
||||
# define KERNEL_STRUCT_MEMBER(parent, type, name) \
|
||||
constant type kernel_data_##parent##_##name \
|
||||
[[function_constant(KernelData_##parent##_##name)]];
|
||||
# include "kernel/data_template.h"
|
||||
#endif
|
@@ -5,6 +5,7 @@
|
||||
|
||||
#include "kernel/device/metal/compat.h"
|
||||
#include "kernel/device/metal/globals.h"
|
||||
#include "kernel/device/metal/function_constants.h"
|
||||
#include "kernel/device/gpu/kernel.h"
|
||||
|
||||
/* MetalRT intersection handlers */
|
||||
@@ -409,6 +410,7 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
@@ -433,7 +435,7 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
@@ -455,6 +457,7 @@ void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
@@ -474,7 +477,7 @@ void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
@@ -493,6 +496,7 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
@@ -510,7 +514,7 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmax, result);
|
||||
ray_tmin, ray_tmax, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -524,6 +528,7 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
@@ -541,7 +546,7 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmax, result);
|
||||
ray_tmin, ray_tmax, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -555,6 +560,7 @@ __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buff
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
@@ -570,7 +576,7 @@ __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buff
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmax, result);
|
||||
ray_tmin, ray_tmax, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -583,6 +589,7 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
@@ -599,7 +606,7 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmax, result);
|
||||
ray_tmin, ray_tmax, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -615,6 +622,7 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
@@ -639,7 +647,7 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
@@ -661,6 +669,7 @@ void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
@@ -680,7 +689,7 @@ void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
@@ -699,6 +708,7 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
@@ -715,7 +725,7 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmax, result);
|
||||
ray_tmin, ray_tmax, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -728,6 +738,7 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
@@ -744,7 +755,7 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmax, result);
|
||||
ray_tmin, ray_tmax, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -670,7 +670,7 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
}
|
||||
|
||||
static const int lowest_supported_driver_version_win = 1011660;
|
||||
static const int lowest_supported_driver_version_neo = 20066;
|
||||
static const int lowest_supported_driver_version_neo = 23570;
|
||||
|
||||
static int parse_driver_build_version(const sycl::device &device)
|
||||
{
|
||||
|
@@ -51,32 +51,36 @@ ccl_device_forceinline int get_object_id()
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_intersect_shadow(nullptr, path_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_intersect_subsurface(nullptr, path_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_intersect_volume_stack(nullptr, path_index);
|
||||
}
|
||||
|
||||
@@ -408,6 +412,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
|
||||
|
||||
float3 P = optixGetObjectRayOrigin();
|
||||
float3 dir = optixGetObjectRayDirection();
|
||||
float tmin = optixGetRayTmin();
|
||||
|
||||
/* The direction is not normalized by default, but the curve intersection routine expects that */
|
||||
float len;
|
||||
@@ -425,7 +430,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
if (curve_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
|
||||
static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
|
||||
optixReportIntersection(isect.t / len,
|
||||
type & PRIMITIVE_ALL,
|
||||
@@ -462,6 +467,7 @@ extern "C" __global__ void __intersection__point()
|
||||
|
||||
float3 P = optixGetObjectRayOrigin();
|
||||
float3 dir = optixGetObjectRayDirection();
|
||||
float tmin = optixGetRayTmin();
|
||||
|
||||
/* The direction is not normalized by default, the point intersection routine expects that. */
|
||||
float len;
|
||||
@@ -480,7 +486,7 @@ extern "C" __global__ void __intersection__point()
|
||||
isect.t *= len;
|
||||
}
|
||||
|
||||
if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
if (point_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
|
||||
static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
|
||||
optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
|
||||
}
|
||||
|
@@ -156,7 +156,8 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
|
||||
}
|
||||
|
||||
ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
ccl_private float *ray_tfar,
|
||||
const float ray_tmin,
|
||||
ccl_private float *ray_tmax,
|
||||
const float dt,
|
||||
const float4 curve[4],
|
||||
float u,
|
||||
@@ -220,7 +221,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
|
||||
if (fabsf(f) < f_err && fabsf(g) < g_err) {
|
||||
t += dt;
|
||||
if (!(0.0f <= t && t <= *ray_tfar)) {
|
||||
if (!(t >= ray_tmin && t <= *ray_tmax)) {
|
||||
return false; /* Rejects NaNs */
|
||||
}
|
||||
if (!(u >= 0.0f && u <= 1.0f)) {
|
||||
@@ -237,7 +238,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
}
|
||||
|
||||
/* Record intersection. */
|
||||
*ray_tfar = t;
|
||||
*ray_tmax = t;
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = 0.0f;
|
||||
@@ -250,7 +251,8 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
|
||||
ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
const float3 ray_dir,
|
||||
float ray_tfar,
|
||||
const float ray_tmin,
|
||||
float ray_tmax,
|
||||
float4 curve[4],
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
@@ -331,7 +333,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
}
|
||||
|
||||
/* Intersect with cap-planes. */
|
||||
float2 tp = make_float2(-dt, ray_tfar - dt);
|
||||
float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt);
|
||||
tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
|
||||
const float2 h0 = half_plane_intersect(
|
||||
float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
|
||||
@@ -394,19 +396,20 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
CURVE_NUM_BEZIER_SUBDIVISIONS;
|
||||
if (depth >= termDepth) {
|
||||
found |= curve_intersect_iterative(
|
||||
ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
|
||||
ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
|
||||
}
|
||||
else {
|
||||
recurse = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid1 && (tp1.x + dt <= ray_tfar)) {
|
||||
const float t1 = tp1.x + dt;
|
||||
if (valid1 && (t1 >= ray_tmin && t1 <= ray_tmax)) {
|
||||
const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
|
||||
CURVE_NUM_BEZIER_SUBDIVISIONS;
|
||||
if (depth >= termDepth) {
|
||||
found |= curve_intersect_iterative(
|
||||
ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
|
||||
ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
|
||||
}
|
||||
else {
|
||||
recurse = true;
|
||||
@@ -456,7 +459,8 @@ ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, c
|
||||
* v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
|
||||
* triangles gets intersected.
|
||||
*/
|
||||
ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
|
||||
ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
const float3 quad_v0,
|
||||
const float3 quad_v1,
|
||||
const float3 quad_v2,
|
||||
@@ -497,7 +501,7 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
|
||||
|
||||
/* Perform depth test? */
|
||||
const float t = rcpDen * dot(v0, Ng);
|
||||
if (!(0.0f <= t && t <= ray_tfar)) {
|
||||
if (!(t >= ray_tmin && t <= ray_tmax)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -534,7 +538,8 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
|
||||
|
||||
ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
const float3 ray_dir,
|
||||
float ray_tfar,
|
||||
const float ray_tmin,
|
||||
float ray_tmax,
|
||||
const int N,
|
||||
float4 curve[4],
|
||||
ccl_private Intersection *isect)
|
||||
@@ -582,7 +587,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
|
||||
/* Intersect quad. */
|
||||
float vu, vv, vt;
|
||||
bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt);
|
||||
bool valid0 = ribbon_intersect_quad(ray_tmin, ray_tmax, lp0, lp1, up1, up0, &vu, &vv, &vt);
|
||||
|
||||
if (valid0) {
|
||||
/* ignore self intersections */
|
||||
@@ -596,7 +601,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
vv = 2.0f * vv - 1.0f;
|
||||
|
||||
/* Record intersection. */
|
||||
ray_tfar = vt;
|
||||
ray_tmax = vt;
|
||||
isect->t = vt;
|
||||
isect->u = u + vu * step_size;
|
||||
isect->v = vv;
|
||||
@@ -616,6 +621,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
|
||||
ccl_private Intersection *isect,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
int object,
|
||||
int prim,
|
||||
@@ -645,7 +651,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
|
||||
if (type & PRIMITIVE_CURVE_RIBBON) {
|
||||
/* todo: adaptive number of subdivisions could help performance here. */
|
||||
const int subdivisions = kernel_data.bvh.curve_subdivisions;
|
||||
if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) {
|
||||
if (ribbon_intersect(P, dir, tmin, tmax, subdivisions, curve, isect)) {
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
@@ -655,7 +661,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
if (curve_intersect_recursive(P, dir, tmax, curve, isect)) {
|
||||
if (curve_intersect_recursive(P, dir, tmin, tmax, curve, isect)) {
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
|
@@ -46,6 +46,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
|
||||
ccl_private Intersection *isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float tmin,
|
||||
float tmax,
|
||||
float time,
|
||||
uint visibility,
|
||||
@@ -58,7 +59,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
|
||||
motion_triangle_vertices(kg, object, prim, time, verts);
|
||||
/* Ray-triangle intersection, unoptimized. */
|
||||
float t, u, v;
|
||||
if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
|
||||
if (ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* Visibility flag test. we do it here under the assumption
|
||||
* that most triangles are culled by node flags.
|
||||
@@ -92,6 +93,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
|
||||
int object,
|
||||
int prim,
|
||||
int prim_addr,
|
||||
float tmin,
|
||||
float tmax,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
@@ -101,7 +103,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
|
||||
motion_triangle_vertices(kg, object, prim, time, verts);
|
||||
/* Ray-triangle intersection, unoptimized. */
|
||||
float t, u, v;
|
||||
if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
|
||||
if (!ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -9,8 +9,12 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __POINTCLOUD__
|
||||
|
||||
ccl_device_forceinline bool point_intersect_test(
|
||||
const float4 point, const float3 P, const float3 dir, const float tmax, ccl_private float *t)
|
||||
ccl_device_forceinline bool point_intersect_test(const float4 point,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
ccl_private float *t)
|
||||
{
|
||||
const float3 center = float4_to_float3(point);
|
||||
const float radius = point.w;
|
||||
@@ -28,12 +32,12 @@ ccl_device_forceinline bool point_intersect_test(
|
||||
|
||||
const float td = sqrt((r2 - l2) * rd2);
|
||||
const float t_front = projC0 - td;
|
||||
const bool valid_front = (0.0f <= t_front) & (t_front <= tmax);
|
||||
const bool valid_front = (tmin <= t_front) & (t_front <= tmax);
|
||||
|
||||
/* Always back-face culling for now. */
|
||||
# if 0
|
||||
const float t_back = projC0 + td;
|
||||
const bool valid_back = (0.0f <= t_back) & (t_back <= tmax);
|
||||
const bool valid_back = (tmin <= t_back) & (t_back <= tmax);
|
||||
|
||||
/* check if there is a first hit */
|
||||
const bool valid_first = valid_front | valid_back;
|
||||
@@ -56,6 +60,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
|
||||
ccl_private Intersection *isect,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const int object,
|
||||
const int prim,
|
||||
@@ -65,7 +70,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
|
||||
const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) :
|
||||
kernel_data_fetch(points, prim);
|
||||
|
||||
if (!point_intersect_test(point, P, dir, tmax, &isect->t)) {
|
||||
if (!point_intersect_test(point, P, dir, tmin, tmax, &isect->t)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -407,7 +407,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
|
||||
{
|
||||
|
||||
/* vectors */
|
||||
sd->P = ray->P;
|
||||
sd->P = ray->P + ray->D * ray->tmin;
|
||||
sd->N = -ray->D;
|
||||
sd->Ng = -ray->D;
|
||||
sd->I = -ray->D;
|
||||
@@ -441,7 +441,6 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
|
||||
|
||||
/* for NDC coordinates */
|
||||
sd->ray_P = ray->P;
|
||||
sd->ray_dP = ray->dP;
|
||||
}
|
||||
#endif /* __VOLUME__ */
|
||||
|
||||
|
@@ -17,6 +17,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
|
||||
ccl_private Intersection *isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float tmin,
|
||||
float tmax,
|
||||
uint visibility,
|
||||
int object,
|
||||
@@ -28,7 +29,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
|
||||
tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
|
||||
float t, u, v;
|
||||
if (ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
|
||||
if (ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* Visibility flag test. we do it here under the assumption
|
||||
* that most triangles are culled by node flags.
|
||||
@@ -62,6 +63,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
|
||||
int object,
|
||||
int prim,
|
||||
int prim_addr,
|
||||
float tmin,
|
||||
float tmax,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
@@ -71,7 +73,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
|
||||
tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
|
||||
float t, u, v;
|
||||
if (!ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
|
||||
if (!ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -174,14 +174,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
||||
Ray ray ccl_optional_struct_init;
|
||||
ray.P = zero_float3();
|
||||
ray.D = normalize(P);
|
||||
ray.t = FLT_MAX;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = FLT_MAX;
|
||||
ray.time = 0.5f;
|
||||
ray.dP = differential_zero_compact();
|
||||
ray.dD = differential_zero_compact();
|
||||
integrator_state_write_ray(kg, state, &ray);
|
||||
|
||||
/* Setup next kernel to execute. */
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
else {
|
||||
/* Surface baking. */
|
||||
@@ -210,7 +211,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
||||
Ray ray ccl_optional_struct_init;
|
||||
ray.P = P + N;
|
||||
ray.D = -N;
|
||||
ray.t = FLT_MAX;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = FLT_MAX;
|
||||
ray.time = 0.5f;
|
||||
|
||||
/* Setup differentials. */
|
||||
@@ -247,13 +249,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
|
||||
integrator_path_init_sorted(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
|
||||
integrator_path_init_sorted(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -86,7 +86,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
|
||||
/* Generate camera ray. */
|
||||
Ray ray;
|
||||
integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
|
||||
if (ray.t == 0.0f) {
|
||||
if (ray.tmax == 0.0f) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -100,10 +100,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
|
||||
/* Continue with intersect_closest kernel, optionally initializing volume
|
||||
* stack before that if the camera may be inside a volume. */
|
||||
if (kernel_data.cam.is_inside_volume) {
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@@ -109,14 +109,14 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
||||
/* If using background pass, schedule background shading kernel so that we have a background
|
||||
* to alpha-over on. The background kernel will then continue the path afterwards. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -128,18 +128,19 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_init_sorted(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
@@ -156,20 +157,21 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after executing background shader for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
@@ -177,7 +179,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
integrator_path_next(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -190,7 +193,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
||||
*
|
||||
* Note that current_kernel is a template value since making this a variable
|
||||
* leads to poor performance with CUDA atomics. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
@@ -206,10 +209,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0;
|
||||
|
||||
if (!integrator_intersect_terminate(kg, state, flags)) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
integrator_path_terminate(kg, state, current_kernel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -218,7 +221,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
if (hit) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
@@ -231,16 +234,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
@@ -249,13 +252,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
integrator_path_terminate(kg, state, current_kernel);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -263,7 +266,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
*
|
||||
* The logic here matches integrator_intersect_next_kernel, except that
|
||||
* volume shading and termination testing have already been done. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
@@ -273,7 +276,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
if (isect->prim != PRIM_NONE) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
@@ -286,16 +289,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
@@ -307,7 +310,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -321,7 +324,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
|
||||
/* Read ray from integrator state into local memory. */
|
||||
Ray ray ccl_optional_struct_init;
|
||||
integrator_state_read_ray(kg, state, &ray);
|
||||
kernel_assert(ray.t != 0.0f);
|
||||
kernel_assert(ray.tmax != 0.0f);
|
||||
|
||||
const uint visibility = path_state_ray_visibility(state);
|
||||
const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
|
||||
@@ -329,12 +332,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
|
||||
|
||||
/* Trick to use short AO rays to approximate indirect light at the end of the path. */
|
||||
if (path_state_ao_bounce(kg, state)) {
|
||||
ray.t = kernel_data.integrator.ao_bounces_distance;
|
||||
ray.tmax = kernel_data.integrator.ao_bounces_distance;
|
||||
|
||||
if (last_isect_object != OBJECT_NONE) {
|
||||
const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance;
|
||||
if (object_ao_distance != 0.0f) {
|
||||
ray.t = object_ao_distance;
|
||||
ray.tmax = object_ao_distance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
|
||||
|
||||
if (opaque_hit) {
|
||||
/* Hit an opaque surface, shadow path ends here. */
|
||||
INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
|
||||
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
@@ -171,7 +171,9 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
|
||||
*
|
||||
* TODO: could also write to render buffer directly if no transparent shadows?
|
||||
* Could save a kernel execution for the common case. */
|
||||
INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
|
||||
integrator_shadow_path_next(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
return;
|
||||
}
|
||||
|
@@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat
|
||||
}
|
||||
#endif
|
||||
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -24,7 +24,8 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
|
||||
Ray volume_ray ccl_optional_struct_init;
|
||||
volume_ray.P = from_P;
|
||||
volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
|
||||
volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax);
|
||||
volume_ray.tmin = 0.0f;
|
||||
volume_ray.self.object = INTEGRATOR_STATE(state, isect, object);
|
||||
volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim);
|
||||
volume_ray.self.light_object = OBJECT_NONE;
|
||||
@@ -58,12 +59,9 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
volume_stack_enter_exit(kg, state, stack_sd);
|
||||
|
||||
/* Move ray forward. */
|
||||
volume_ray.P = stack_sd->P;
|
||||
volume_ray.tmin = intersection_t_offset(isect.t);
|
||||
volume_ray.self.object = isect.object;
|
||||
volume_ray.self.prim = isect.prim;
|
||||
if (volume_ray.t != FLT_MAX) {
|
||||
volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
|
||||
}
|
||||
++step;
|
||||
}
|
||||
#endif
|
||||
@@ -82,7 +80,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
/* Trace ray in random direction. Any direction works, Z up is a guess to get the
|
||||
* fewest hits. */
|
||||
volume_ray.D = make_float3(0.0f, 0.0f, 1.0f);
|
||||
volume_ray.t = FLT_MAX;
|
||||
volume_ray.tmin = 0.0f;
|
||||
volume_ray.tmax = FLT_MAX;
|
||||
volume_ray.self.object = OBJECT_NONE;
|
||||
volume_ray.self.prim = PRIM_NONE;
|
||||
volume_ray.self.light_object = OBJECT_NONE;
|
||||
@@ -199,7 +198,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
}
|
||||
|
||||
/* Move ray forward. */
|
||||
volume_ray.P = stack_sd->P;
|
||||
volume_ray.tmin = intersection_t_offset(isect.t);
|
||||
volume_ray.self.object = isect.object;
|
||||
volume_ray.self.prim = isect.prim;
|
||||
++step;
|
||||
@@ -222,7 +221,9 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
|
||||
}
|
||||
else {
|
||||
/* Volume stack init for camera rays, continue with intersection of camera ray. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
integrator_path_next(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
}
|
||||
|
@@ -137,8 +137,14 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
else if (ls->type == LIGHT_AREA) {
|
||||
float invarea = fabsf(klight->area.invarea);
|
||||
ls->D = normalize_len(ls->P - P, &ls->t);
|
||||
ls->pdf = fabsf(klight->area.invarea);
|
||||
ls->pdf = invarea;
|
||||
if (klight->area.tan_spread > 0.f) {
|
||||
ls->eval_fac = 0.25f * invarea;
|
||||
ls->eval_fac *= light_spread_attenuation(
|
||||
ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
|
||||
}
|
||||
}
|
||||
|
||||
ls->pdf *= kernel_data.integrator.pdf_lights;
|
||||
@@ -436,6 +442,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
|
||||
projection_ray.self.light_prim = PRIM_NONE;
|
||||
projection_ray.dP = differential_make_compact(sd->dP);
|
||||
projection_ray.dD = differential_zero_compact();
|
||||
projection_ray.tmin = 0.0f;
|
||||
projection_ray.time = sd->time;
|
||||
Intersection projection_isect;
|
||||
|
||||
@@ -499,8 +506,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
|
||||
projection_ray.self.prim = pv.prim;
|
||||
projection_ray.P = pv.p;
|
||||
}
|
||||
projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t);
|
||||
projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
|
||||
projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax);
|
||||
projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
|
||||
|
||||
bool projection_success = false;
|
||||
for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) {
|
||||
@@ -519,8 +526,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
|
||||
|
||||
projection_ray.self.object = projection_isect.object;
|
||||
projection_ray.self.prim = projection_isect.prim;
|
||||
projection_ray.P += projection_isect.t * projection_ray.D;
|
||||
projection_ray.t -= projection_isect.t;
|
||||
projection_ray.tmin = intersection_t_offset(projection_isect.t);
|
||||
}
|
||||
if (!projection_success) {
|
||||
reduce_stepsize = true;
|
||||
@@ -852,6 +858,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
|
||||
Ray probe_ray;
|
||||
probe_ray.self.light_object = ls->object;
|
||||
probe_ray.self.light_prim = ls->prim;
|
||||
probe_ray.tmin = 0.0f;
|
||||
probe_ray.dP = differential_make_compact(sd->dP);
|
||||
probe_ray.dD = differential_zero_compact();
|
||||
probe_ray.time = sd->time;
|
||||
@@ -867,13 +874,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
|
||||
ccl_private const ManifoldVertex &v = vertices[vi];
|
||||
|
||||
/* Check visibility. */
|
||||
probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t);
|
||||
probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax);
|
||||
if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) {
|
||||
int hit_object = (probe_isect.object == OBJECT_NONE) ?
|
||||
kernel_data_fetch(prim_object, probe_isect.prim) :
|
||||
probe_isect.object;
|
||||
/* Test whether the ray hit the appropriate object at its intended location. */
|
||||
if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE)
|
||||
if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE)
|
||||
return false;
|
||||
}
|
||||
probe_ray.self.object = v.object;
|
||||
@@ -952,15 +959,16 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
|
||||
probe_ray.self.light_object = ls->object;
|
||||
probe_ray.self.light_prim = ls->prim;
|
||||
probe_ray.P = sd->P;
|
||||
probe_ray.tmin = 0.0f;
|
||||
if (ls->t == FLT_MAX) {
|
||||
/* Distant / env light. */
|
||||
probe_ray.D = ls->D;
|
||||
probe_ray.t = ls->t;
|
||||
probe_ray.tmax = ls->t;
|
||||
}
|
||||
else {
|
||||
/* Other lights, avoid self-intersection. */
|
||||
probe_ray.D = ls->P - probe_ray.P;
|
||||
probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t);
|
||||
probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax);
|
||||
}
|
||||
probe_ray.dP = differential_make_compact(sd->dP);
|
||||
probe_ray.dD = differential_zero_compact();
|
||||
@@ -1042,9 +1050,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
|
||||
|
||||
probe_ray.self.object = probe_isect.object;
|
||||
probe_ray.self.prim = probe_isect.prim;
|
||||
probe_ray.P += probe_isect.t * probe_ray.D;
|
||||
if (ls->t != FLT_MAX)
|
||||
probe_ray.t -= probe_isect.t;
|
||||
probe_ray.tmin = intersection_t_offset(probe_isect.t);
|
||||
};
|
||||
|
||||
/* Mark the manifold walk invalid to keep mollification on by default. */
|
||||
|
@@ -52,7 +52,6 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
|
||||
PATH_RAY_TRANSPARENT_BACKGROUND;
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
|
||||
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
|
||||
|
@@ -62,11 +62,10 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
|
||||
const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
|
||||
const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
|
||||
const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
|
||||
const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
|
||||
|
||||
/* multiple importance sampling, get background light pdf for ray
|
||||
* direction, and compute weight with respect to BSDF pdf */
|
||||
const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
|
||||
const float pdf = background_light_pdf(kg, ray_P, ray_D);
|
||||
const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
|
||||
L *= mis_weight;
|
||||
}
|
||||
@@ -213,7 +212,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
|
||||
}
|
||||
#endif
|
||||
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -22,19 +22,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
|
||||
const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
|
||||
const float ray_time = INTEGRATOR_STATE(state, ray, time);
|
||||
|
||||
/* Advance ray beyond light. */
|
||||
/* TODO: can we make this more numerically robust to avoid reintersecting the
|
||||
* same light in some cases? Ray should not intersect surface anymore as the
|
||||
* object and prim ids will prevent self intersection. */
|
||||
const float3 new_ray_P = ray_P + ray_D * isect.t;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
|
||||
|
||||
/* Set position to where the BSDF was sampled, for correct MIS PDF. */
|
||||
const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
|
||||
ray_P -= ray_D * mis_ray_t;
|
||||
isect.t += mis_ray_t;
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t;
|
||||
/* Advance ray to new start distance. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t);
|
||||
|
||||
LightSample ls ccl_optional_struct_init;
|
||||
const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
|
||||
@@ -99,11 +88,13 @@ ccl_device void integrator_shade_light(KernelGlobals kg,
|
||||
INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
|
||||
|
||||
if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
|
||||
integrator_path_next(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
return;
|
||||
}
|
||||
|
@@ -75,13 +75,9 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
|
||||
ray.self.light_object = OBJECT_NONE;
|
||||
ray.self.light_prim = PRIM_NONE;
|
||||
/* Modify ray position and length to match current segment. */
|
||||
const float start_t = (hit == 0) ? 0.0f :
|
||||
INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
|
||||
const float end_t = (hit < num_recorded_hits) ?
|
||||
INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
|
||||
ray.t;
|
||||
ray.P += start_t * ray.D;
|
||||
ray.t = end_t - start_t;
|
||||
ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
|
||||
ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
|
||||
ray.tmax;
|
||||
|
||||
shader_setup_from_volume(kg, shadow_sd, &ray);
|
||||
|
||||
@@ -137,10 +133,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
|
||||
/* There are more hits that we could not recorded due to memory usage,
|
||||
* adjust ray to intersect again from the last hit. */
|
||||
const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
|
||||
const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
|
||||
const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t);
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -158,20 +151,22 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
|
||||
/* Evaluate transparent shadows. */
|
||||
const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
|
||||
if (opaque) {
|
||||
INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (shadow_intersections_has_remaining(num_hits)) {
|
||||
/* More intersections to find, continue shadow ray. */
|
||||
INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
|
||||
integrator_shadow_path_next(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
kernel_accum_light(kg, state, render_buffer);
|
||||
INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@@ -77,7 +77,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
|
||||
# endif
|
||||
{
|
||||
const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
|
||||
const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
|
||||
const float t = sd->ray_length;
|
||||
|
||||
/* Multiple importance sampling, get triangle light pdf,
|
||||
* and compute weight with respect to BSDF pdf. */
|
||||
@@ -190,8 +190,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
|
||||
const bool is_light = light_sample_is_light(&ls);
|
||||
|
||||
/* Branch off shadow kernel. */
|
||||
INTEGRATOR_SHADOW_PATH_INIT(
|
||||
shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
|
||||
IntegratorShadowState shadow_state = integrator_shadow_path_init(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
|
||||
|
||||
/* Copy volume stack and enter/exit volume. */
|
||||
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
|
||||
@@ -323,16 +323,21 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
|
||||
return LABEL_NONE;
|
||||
}
|
||||
|
||||
/* Setup ray. Note that clipping works through transparent bounces. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
|
||||
INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
|
||||
FLT_MAX;
|
||||
if (label & LABEL_TRANSPARENT) {
|
||||
/* Only need to modify start distance for transparent. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
|
||||
}
|
||||
else {
|
||||
/* Setup ray with changed origin and direction. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Update throughput. */
|
||||
float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
@@ -349,12 +354,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
|
||||
}
|
||||
|
||||
/* Update path state */
|
||||
if (label & LABEL_TRANSPARENT) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
|
||||
}
|
||||
else {
|
||||
if (!(label & LABEL_TRANSPARENT)) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
|
||||
bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
|
||||
}
|
||||
@@ -371,17 +372,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState
|
||||
return LABEL_NONE;
|
||||
}
|
||||
|
||||
/* Setup ray position, direction stays unchanged. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
|
||||
|
||||
/* Clipping works through transparent. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
|
||||
|
||||
# ifdef __RAY_DIFFERENTIALS__
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
|
||||
# endif
|
||||
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
|
||||
/* Only modify start distance. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
|
||||
|
||||
return LABEL_TRANSMIT | LABEL_TRANSPARENT;
|
||||
}
|
||||
@@ -432,7 +424,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
|
||||
Ray ray ccl_optional_struct_init;
|
||||
ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self);
|
||||
ray.D = ao_D;
|
||||
ray.t = kernel_data.integrator.ao_bounces_distance;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = kernel_data.integrator.ao_bounces_distance;
|
||||
ray.time = sd->time;
|
||||
ray.self.object = (skip_self) ? sd->object : OBJECT_NONE;
|
||||
ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE;
|
||||
@@ -442,7 +435,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
|
||||
ray.dD = differential_zero_compact();
|
||||
|
||||
/* Branch off shadow kernel. */
|
||||
INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
|
||||
IntegratorShadowState shadow_state = integrator_shadow_path_init(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true);
|
||||
|
||||
/* Copy volume stack and enter/exit volume. */
|
||||
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
|
||||
@@ -604,22 +598,23 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
||||
}
|
||||
|
||||
template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
|
||||
int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
|
||||
DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
|
||||
ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
|
||||
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
integrator_path_next(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
}
|
||||
else {
|
||||
kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
integrator_path_terminate(kg, state, current_kernel);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -114,7 +114,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
|
||||
ccl_device_forceinline void volume_step_init(KernelGlobals kg,
|
||||
ccl_private const RNGState *rng_state,
|
||||
const float object_step_size,
|
||||
float t,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
ccl_private float *step_size,
|
||||
ccl_private float *step_shade_offset,
|
||||
ccl_private float *steps_offset,
|
||||
@@ -122,7 +123,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
|
||||
{
|
||||
if (object_step_size == FLT_MAX) {
|
||||
/* Homogeneous volume. */
|
||||
*step_size = t;
|
||||
*step_size = tmax - tmin;
|
||||
*step_shade_offset = 0.0f;
|
||||
*steps_offset = 1.0f;
|
||||
*max_steps = 1;
|
||||
@@ -130,6 +131,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
|
||||
else {
|
||||
/* Heterogeneous volume. */
|
||||
*max_steps = kernel_data.integrator.volume_max_steps;
|
||||
const float t = tmax - tmin;
|
||||
float step = min(object_step_size, t);
|
||||
|
||||
/* compute exact steps in advance for malloc */
|
||||
@@ -165,7 +167,7 @@ ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState stat
|
||||
float3 sigma_t = zero_float3();
|
||||
|
||||
if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
|
||||
*throughput *= volume_color_transmittance(sigma_t, ray->t);
|
||||
*throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
@@ -194,7 +196,8 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
||||
volume_step_init(kg,
|
||||
&rng_state,
|
||||
object_step_size,
|
||||
ray->t,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
&step_size,
|
||||
&step_shade_offset,
|
||||
&unused,
|
||||
@@ -202,13 +205,13 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
||||
const float steps_offset = 1.0f;
|
||||
|
||||
/* compute extinction at the start */
|
||||
float t = 0.0f;
|
||||
float t = ray->tmin;
|
||||
|
||||
float3 sum = zero_float3();
|
||||
|
||||
for (int i = 0; i < max_steps; i++) {
|
||||
/* advance to new position */
|
||||
float new_t = min(ray->t, (i + steps_offset) * step_size);
|
||||
float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
|
||||
float dt = new_t - t;
|
||||
|
||||
float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
|
||||
@@ -233,7 +236,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
||||
|
||||
/* stop if at the end of the volume */
|
||||
t = new_t;
|
||||
if (t == ray->t) {
|
||||
if (t == ray->tmax) {
|
||||
/* Update throughput in case we haven't done it above */
|
||||
tp = *throughput * exp(sum);
|
||||
break;
|
||||
@@ -257,15 +260,16 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
|
||||
const float xi,
|
||||
ccl_private float *pdf)
|
||||
{
|
||||
const float t = ray->t;
|
||||
const float tmin = ray->tmin;
|
||||
const float tmax = ray->tmax;
|
||||
const float delta = dot((light_P - ray->P), ray->D);
|
||||
const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
|
||||
if (UNLIKELY(D == 0.0f)) {
|
||||
*pdf = 0.0f;
|
||||
return 0.0f;
|
||||
}
|
||||
const float theta_a = -atan2f(delta, D);
|
||||
const float theta_b = atan2f(t - delta, D);
|
||||
const float theta_a = atan2f(tmin - delta, D);
|
||||
const float theta_b = atan2f(tmax - delta, D);
|
||||
const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
|
||||
if (UNLIKELY(theta_b == theta_a)) {
|
||||
*pdf = 0.0f;
|
||||
@@ -273,7 +277,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
|
||||
}
|
||||
*pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
|
||||
|
||||
return min(t, delta + t_); /* min is only for float precision errors */
|
||||
return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */
|
||||
}
|
||||
|
||||
ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
|
||||
@@ -286,11 +290,12 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
const float t = ray->t;
|
||||
const float tmin = ray->tmin;
|
||||
const float tmax = ray->tmax;
|
||||
const float t_ = sample_t - delta;
|
||||
|
||||
const float theta_a = -atan2f(delta, D);
|
||||
const float theta_b = atan2f(t - delta, D);
|
||||
const float theta_a = atan2f(tmin - delta, D);
|
||||
const float theta_b = atan2f(tmax - delta, D);
|
||||
if (UNLIKELY(theta_b == theta_a)) {
|
||||
return 0.0f;
|
||||
}
|
||||
@@ -310,11 +315,12 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
const float t = ray->t;
|
||||
const float tmin = ray->tmin;
|
||||
const float tmax = ray->tmax;
|
||||
const float t_ = sample_t - delta;
|
||||
|
||||
const float theta_a = -atan2f(delta, D);
|
||||
const float theta_b = atan2f(t - delta, D);
|
||||
const float theta_a = atan2f(tmin - delta, D);
|
||||
const float theta_b = atan2f(tmax - delta, D);
|
||||
if (UNLIKELY(theta_b == theta_a)) {
|
||||
return 0.0f;
|
||||
}
|
||||
@@ -390,8 +396,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients
|
||||
|
||||
typedef struct VolumeIntegrateState {
|
||||
/* Volume segment extents. */
|
||||
float start_t;
|
||||
float end_t;
|
||||
float tmin;
|
||||
float tmax;
|
||||
|
||||
/* If volume is absorption-only up to this point, and no probabilistic
|
||||
* scattering or termination has been used yet. */
|
||||
@@ -426,9 +432,9 @@ ccl_device_forceinline void volume_integrate_step_scattering(
|
||||
|
||||
/* Equiangular sampling for direct lighting. */
|
||||
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
|
||||
if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t &&
|
||||
if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax &&
|
||||
vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
|
||||
const float new_dt = result.direct_t - vstate.start_t;
|
||||
const float new_dt = result.direct_t - vstate.tmin;
|
||||
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
|
||||
|
||||
result.direct_scatter = true;
|
||||
@@ -458,7 +464,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
|
||||
/* compute sampling distance */
|
||||
const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
|
||||
const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
|
||||
const float new_t = vstate.start_t + new_dt;
|
||||
const float new_t = vstate.tmin + new_dt;
|
||||
|
||||
/* transmittance and pdf */
|
||||
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
|
||||
@@ -528,7 +534,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
volume_step_init(kg,
|
||||
rng_state,
|
||||
object_step_size,
|
||||
ray->t,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
&step_size,
|
||||
&step_shade_offset,
|
||||
&steps_offset,
|
||||
@@ -536,8 +543,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
|
||||
/* Initialize volume integration state. */
|
||||
VolumeIntegrateState vstate ccl_optional_struct_init;
|
||||
vstate.start_t = 0.0f;
|
||||
vstate.end_t = 0.0f;
|
||||
vstate.tmin = ray->tmin;
|
||||
vstate.tmax = ray->tmin;
|
||||
vstate.absorption_only = true;
|
||||
vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
|
||||
vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
|
||||
@@ -578,8 +585,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
|
||||
for (int i = 0; i < max_steps; i++) {
|
||||
/* Advance to new position */
|
||||
vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
|
||||
const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
|
||||
vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
|
||||
const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset;
|
||||
sd->P = ray->P + ray->D * shade_t;
|
||||
|
||||
/* compute segment */
|
||||
@@ -588,7 +595,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
const int closure_flag = sd->flag;
|
||||
|
||||
/* Evaluate transmittance over segment. */
|
||||
const float dt = (vstate.end_t - vstate.start_t);
|
||||
const float dt = (vstate.tmax - vstate.tmin);
|
||||
const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
|
||||
volume_color_transmittance(coeff.sigma_t, dt) :
|
||||
one_float3();
|
||||
@@ -645,8 +652,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
}
|
||||
|
||||
/* Stop if at the end of the volume. */
|
||||
vstate.start_t = vstate.end_t;
|
||||
if (vstate.start_t == ray->t) {
|
||||
vstate.tmin = vstate.tmax;
|
||||
if (vstate.tmin == ray->tmax) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -774,8 +781,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
|
||||
const bool is_light = light_sample_is_light(ls);
|
||||
|
||||
/* Branch off shadow kernel. */
|
||||
INTEGRATOR_SHADOW_PATH_INIT(
|
||||
shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
|
||||
IntegratorShadowState shadow_state = integrator_shadow_path_init(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
|
||||
|
||||
/* Write shadow ray and associated state to global memory. */
|
||||
integrator_state_write_shadow_ray(kg, shadow_state, &ray);
|
||||
@@ -880,7 +887,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
|
||||
/* Setup ray. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
|
||||
# ifdef __RAY_DIFFERENTIALS__
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
|
||||
@@ -901,7 +909,6 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
|
||||
|
||||
/* Update path state */
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
|
||||
INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
|
||||
phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
|
||||
|
||||
@@ -1021,7 +1028,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
||||
integrator_state_read_isect(kg, state, &isect);
|
||||
|
||||
/* Set ray length to current segment. */
|
||||
ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
|
||||
ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
|
||||
|
||||
/* Clean volume stack for background rays. */
|
||||
if (isect.prim == PRIM_NONE) {
|
||||
@@ -1032,13 +1039,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
||||
|
||||
if (event == VOLUME_PATH_SCATTERED) {
|
||||
/* Queue intersect_closest kernel. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
integrator_path_next(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
return;
|
||||
}
|
||||
else if (event == VOLUME_PATH_MISSED) {
|
||||
/* End path. */
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
|
@@ -50,7 +50,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
|
||||
ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
|
||||
ConstIntegratorState state)
|
||||
{
|
||||
if (INTEGRATOR_PATH_IS_TERMINATED) {
|
||||
if (integrator_path_is_terminated(state)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -47,7 +47,8 @@ KERNEL_STRUCT_END(shadow_path)
|
||||
KERNEL_STRUCT_BEGIN(shadow_ray)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING)
|
||||
|
@@ -127,6 +127,9 @@ typedef struct IntegratorStateGPU {
|
||||
|
||||
/* Index of main path which will be used by a next shadow catcher split. */
|
||||
ccl_global int *next_main_path_index;
|
||||
|
||||
/* Divisor used to partition active indices by locality when sorting by material. */
|
||||
uint sort_partition_divisor;
|
||||
} IntegratorStateGPU;
|
||||
|
||||
/* Abstraction
|
||||
|
@@ -10,125 +10,196 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Control Flow
|
||||
*
|
||||
* Utilities for control flow between kernels. The implementation may differ per device
|
||||
* or even be handled on the host side. To abstract such differences, experiment with
|
||||
* different implementations and for debugging, this is abstracted using macros.
|
||||
* Utilities for control flow between kernels. The implementation is different between CPU and
|
||||
* GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
|
||||
*
|
||||
* There is a main path for regular path tracing camera for path tracing. Shadows for next
|
||||
* event estimation branch off from this into their own path, that may be computed in
|
||||
* parallel while the main path continues.
|
||||
* parallel while the main path continues. Additionally, shading kernels are sorted using
|
||||
* a key for coherence.
|
||||
*
|
||||
* Each kernel on the main path must call one of these functions. These may not be called
|
||||
* multiple times from the same kernel.
|
||||
*
|
||||
* INTEGRATOR_PATH_INIT(next_kernel)
|
||||
* INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
|
||||
* INTEGRATOR_PATH_TERMINATE(current_kernel)
|
||||
* integrator_path_init(kg, state, next_kernel)
|
||||
* integrator_path_next(kg, state, current_kernel, next_kernel)
|
||||
* integrator_path_terminate(kg, state, current_kernel)
|
||||
*
|
||||
* For the shadow path similar functions are used, and again each shadow kernel must call
|
||||
* one of them, and only once.
|
||||
*/
|
||||
|
||||
#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
|
||||
#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
|
||||
(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
|
||||
ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
|
||||
{
|
||||
return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
|
||||
{
|
||||
return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_GPU__
|
||||
|
||||
# define INTEGRATOR_PATH_INIT(next_kernel) \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
|
||||
IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
|
||||
&kernel_integrator_state.next_shadow_path_index[0], 1); \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
||||
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
|
||||
{ \
|
||||
const int key_ = key; \
|
||||
atomic_fetch_and_add_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
|
||||
1); \
|
||||
}
|
||||
# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
|
||||
{ \
|
||||
const int key_ = key; \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
atomic_fetch_and_add_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
|
||||
1); \
|
||||
}
|
||||
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
|
||||
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
|
||||
{
|
||||
IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
|
||||
&kernel_integrator_state.next_shadow_path_index[0], 1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
return shadow_state;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
||||
}
|
||||
|
||||
/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
|
||||
# define INTEGRATOR_SORT_KEY(key, state) \
|
||||
(key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
|
||||
|
||||
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
const int key_ = INTEGRATOR_SORT_KEY(key, state);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
const int key_ = INTEGRATOR_SORT_KEY(key, state);
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# define INTEGRATOR_PATH_INIT(next_kernel) \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
(void)key; \
|
||||
}
|
||||
# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
(void)key; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
|
||||
IntegratorShadowState shadow_state = &state->shadow_type; \
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
(void)key;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
(void)key;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
|
||||
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
|
||||
{
|
||||
IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
return shadow_state;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -37,11 +37,10 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* enum PathRayMNEE */
|
||||
KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Multiple importance sampling
|
||||
* The PDF of BSDF sampling at the last scatter point, and distance to the
|
||||
* last scatter point minus the last ray segment. This distance lets us
|
||||
* compute the complete distance through transparent surfaces and volumes. */
|
||||
* The PDF of BSDF sampling at the last scatter point, which is at ray distance
|
||||
* zero and distance. Note that transparency and volume attenuation increase
|
||||
* the ray tmin but keep P unmodified so that this works. */
|
||||
KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Filter glossy. */
|
||||
KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Continuation probability for path termination. */
|
||||
@@ -63,7 +62,8 @@ KERNEL_STRUCT_END(path)
|
||||
KERNEL_STRUCT_BEGIN(ray)
|
||||
KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
|
||||
|
@@ -17,7 +17,8 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
|
||||
@@ -29,7 +30,8 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
|
||||
{
|
||||
ray->P = INTEGRATOR_STATE(state, ray, P);
|
||||
ray->D = INTEGRATOR_STATE(state, ray, D);
|
||||
ray->t = INTEGRATOR_STATE(state, ray, t);
|
||||
ray->tmin = INTEGRATOR_STATE(state, ray, tmin);
|
||||
ray->tmax = INTEGRATOR_STATE(state, ray, tmax);
|
||||
ray->time = INTEGRATOR_STATE(state, ray, time);
|
||||
ray->dP = INTEGRATOR_STATE(state, ray, dP);
|
||||
ray->dD = INTEGRATOR_STATE(state, ray, dD);
|
||||
@@ -42,7 +44,8 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
|
||||
}
|
||||
@@ -53,7 +56,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
|
||||
{
|
||||
ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
|
||||
ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
|
||||
ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
|
||||
ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin);
|
||||
ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax);
|
||||
ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
|
||||
ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
|
||||
ray->dD = differential_zero_compact();
|
||||
|
@@ -38,7 +38,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
|
||||
/* Setup ray into surface. */
|
||||
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
|
||||
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
|
||||
|
||||
@@ -160,7 +161,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
|
||||
/* Pretend ray is coming from the outside towards the exit point. This ensures
|
||||
* correct front/back facing normals.
|
||||
* TODO: find a more elegant solution? */
|
||||
ray.P += ray.D * ray.t * 2.0f;
|
||||
ray.P += ray.D * ray.tmax * 2.0f;
|
||||
ray.D = -ray.D;
|
||||
|
||||
integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
|
||||
@@ -177,17 +178,23 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
integrator_path_next_sorted(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
|
||||
shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
integrator_path_next_sorted(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
|
||||
shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
integrator_path_next_sorted(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
|
||||
shader);
|
||||
}
|
||||
|
@@ -82,7 +82,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
|
||||
/* Create ray. */
|
||||
ray.P = P + disk_N * disk_height + disk_P;
|
||||
ray.D = -disk_N;
|
||||
ray.t = 2.0f * disk_height;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = 2.0f * disk_height;
|
||||
ray.dP = ray_dP;
|
||||
ray.dD = differential_zero_compact();
|
||||
ray.time = time;
|
||||
@@ -188,7 +189,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
|
||||
|
||||
ray.P = ray.P + ray.D * ss_isect.hits[hit].t;
|
||||
ray.D = ss_isect.Ng[hit];
|
||||
ray.t = 1.0f;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = 1.0f;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -195,7 +195,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
||||
/* Setup ray. */
|
||||
ray.P = P;
|
||||
ray.D = D;
|
||||
ray.t = FLT_MAX;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = FLT_MAX;
|
||||
ray.time = time;
|
||||
ray.dP = ray_dP;
|
||||
ray.dD = differential_zero_compact();
|
||||
@@ -370,10 +371,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
||||
* chance of connecting to it.
|
||||
* TODO: Maybe use less than 10 times the mean free path? */
|
||||
if (bounce == 0) {
|
||||
ray.t = max(t, 10.0f / (reduce_min(sigma_t)));
|
||||
ray.tmax = max(t, 10.0f / (reduce_min(sigma_t)));
|
||||
}
|
||||
else {
|
||||
ray.t = t;
|
||||
ray.tmax = t;
|
||||
/* After the first bounce the object can intersect the same surface again */
|
||||
ray.self.object = OBJECT_NONE;
|
||||
ray.self.prim = PRIM_NONE;
|
||||
@@ -384,12 +385,12 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
||||
if (hit) {
|
||||
#ifdef __KERNEL_GPU_RAYTRACING__
|
||||
/* t is always in world space with OptiX and MetalRT. */
|
||||
ray.t = ss_isect.hits[0].t;
|
||||
ray.tmax = ss_isect.hits[0].t;
|
||||
#else
|
||||
/* Compute world space distance to surface hit. */
|
||||
float3 D = transform_direction(&ob_itfm, ray.D);
|
||||
D = normalize(D) * ss_isect.hits[0].t;
|
||||
ray.t = len(transform_direction(&ob_tfm, D));
|
||||
ray.tmax = len(transform_direction(&ob_tfm, D));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -397,16 +398,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
||||
/* Check if we hit the opposite side. */
|
||||
if (hit) {
|
||||
have_opposite_interface = true;
|
||||
opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
|
||||
opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N);
|
||||
}
|
||||
/* Apart from the opposite side check, we were supposed to only trace up to distance t,
|
||||
* so check if there would have been a hit in that case. */
|
||||
hit = ray.t < t;
|
||||
hit = ray.tmax < t;
|
||||
}
|
||||
|
||||
/* Use the distance to the exit point for the throughput update if we found one. */
|
||||
if (hit) {
|
||||
t = ray.t;
|
||||
t = ray.tmax;
|
||||
}
|
||||
|
||||
/* Advance to new scatter location. */
|
||||
|
@@ -270,31 +270,26 @@ ccl_device bool lights_intersect(KernelGlobals kg,
|
||||
|
||||
if (type == LIGHT_SPOT) {
|
||||
/* Spot/Disk light. */
|
||||
const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
|
||||
const float3 ray_P = ray->P - ray->D * mis_ray_t;
|
||||
|
||||
const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
|
||||
const float radius = klight->spot.radius;
|
||||
if (radius == 0.0f) {
|
||||
continue;
|
||||
}
|
||||
/* disk oriented normal */
|
||||
const float3 lightN = normalize(ray_P - lightP);
|
||||
const float3 lightN = normalize(ray->P - lightP);
|
||||
/* One sided. */
|
||||
if (dot(ray->D, lightN) >= 0.0f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float3 P;
|
||||
if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) {
|
||||
if (!ray_disk_intersect(
|
||||
ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (type == LIGHT_POINT) {
|
||||
/* Sphere light (aka, aligned disk light). */
|
||||
const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
|
||||
const float3 ray_P = ray->P - ray->D * mis_ray_t;
|
||||
|
||||
const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
|
||||
const float radius = klight->spot.radius;
|
||||
if (radius == 0.0f) {
|
||||
@@ -302,9 +297,10 @@ ccl_device bool lights_intersect(KernelGlobals kg,
|
||||
}
|
||||
|
||||
/* disk oriented normal */
|
||||
const float3 lightN = normalize(ray_P - lightP);
|
||||
const float3 lightN = normalize(ray->P - lightP);
|
||||
float3 P;
|
||||
if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) {
|
||||
if (!ray_disk_intersect(
|
||||
ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -330,8 +326,19 @@ ccl_device bool lights_intersect(KernelGlobals kg,
|
||||
const float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
|
||||
|
||||
float3 P;
|
||||
if (!ray_quad_intersect(
|
||||
ray->P, ray->D, 0.0f, ray->t, light_P, axisu, axisv, Ng, &P, &t, &u, &v, is_round)) {
|
||||
if (!ray_quad_intersect(ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
light_P,
|
||||
axisu,
|
||||
axisv,
|
||||
Ng,
|
||||
&P,
|
||||
&t,
|
||||
&u,
|
||||
&v,
|
||||
is_round)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -775,7 +782,8 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg,
|
||||
ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
|
||||
|
||||
/* calculate intersection with the planar triangle */
|
||||
if (!ray_triangle_intersect(P, ls->D, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
|
||||
if (!ray_triangle_intersect(
|
||||
P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
|
||||
ls->pdf = 0.0f;
|
||||
return;
|
||||
}
|
||||
|
@@ -227,23 +227,24 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri
|
||||
if (ls->shader & SHADER_CAST_SHADOW) {
|
||||
/* setup ray */
|
||||
ray->P = P;
|
||||
ray->tmin = 0.0f;
|
||||
|
||||
if (ls->t == FLT_MAX) {
|
||||
/* distant light */
|
||||
ray->D = ls->D;
|
||||
ray->t = ls->t;
|
||||
ray->tmax = ls->t;
|
||||
}
|
||||
else {
|
||||
/* other lights, avoid self-intersection */
|
||||
ray->D = ls->P - P;
|
||||
ray->D = normalize_len(ray->D, &ray->t);
|
||||
ray->D = normalize_len(ray->D, &ray->tmax);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* signal to not cast shadow ray */
|
||||
ray->P = zero_float3();
|
||||
ray->D = zero_float3();
|
||||
ray->t = 0.0f;
|
||||
ray->tmax = 0.0f;
|
||||
}
|
||||
|
||||
ray->dP = differential_make_compact(sd->dP);
|
||||
|
@@ -1094,10 +1094,8 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
|
||||
ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
|
||||
if (derivatives) {
|
||||
ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) -
|
||||
ndc[0];
|
||||
ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) -
|
||||
ndc[0];
|
||||
ndc[1] = zero_float3();
|
||||
ndc[2] = zero_float3();
|
||||
}
|
||||
}
|
||||
else {
|
||||
@@ -1671,7 +1669,8 @@ bool OSLRenderServices::trace(TraceOpt &options,
|
||||
|
||||
ray.P = TO_FLOAT3(P);
|
||||
ray.D = TO_FLOAT3(R);
|
||||
ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
|
||||
ray.time = sd->time;
|
||||
ray.self.object = OBJECT_NONE;
|
||||
ray.self.prim = PRIM_NONE;
|
||||
|
@@ -59,7 +59,8 @@ ccl_device float svm_ao(
|
||||
Ray ray;
|
||||
ray.P = sd->P;
|
||||
ray.D = D.x * T + D.y * B + D.z * N;
|
||||
ray.t = max_dist;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = max_dist;
|
||||
ray.time = sd->time;
|
||||
ray.self.object = sd->object;
|
||||
ray.self.prim = sd->prim;
|
||||
|
@@ -179,7 +179,8 @@ ccl_device float3 svm_bevel(
|
||||
Ray ray ccl_optional_struct_init;
|
||||
ray.P = sd->P + disk_N * disk_height + disk_P;
|
||||
ray.D = -disk_N;
|
||||
ray.t = 2.0f * disk_height;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = 2.0f * disk_height;
|
||||
ray.dP = differential_zero_compact();
|
||||
ray.dD = differential_zero_compact();
|
||||
ray.time = sd->time;
|
||||
|
110
intern/cycles/kernel/svm/node_types_template.h
Normal file
110
intern/cycles/kernel/svm/node_types_template.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef SHADER_NODE_TYPE
|
||||
# define SHADER_NODE_TYPE(name)
|
||||
#endif
|
||||
|
||||
/* NOTE: for best OpenCL performance, item definition in the enum must
|
||||
* match the switch case order in `svm.h`. */
|
||||
|
||||
SHADER_NODE_TYPE(NODE_END)
|
||||
SHADER_NODE_TYPE(NODE_SHADER_JUMP)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_BSDF)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_EMISSION)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_BACKGROUND)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_SET_WEIGHT)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_WEIGHT)
|
||||
SHADER_NODE_TYPE(NODE_EMISSION_WEIGHT)
|
||||
SHADER_NODE_TYPE(NODE_MIX_CLOSURE)
|
||||
SHADER_NODE_TYPE(NODE_JUMP_IF_ZERO)
|
||||
SHADER_NODE_TYPE(NODE_JUMP_IF_ONE)
|
||||
SHADER_NODE_TYPE(NODE_GEOMETRY)
|
||||
SHADER_NODE_TYPE(NODE_CONVERT)
|
||||
SHADER_NODE_TYPE(NODE_TEX_COORD)
|
||||
SHADER_NODE_TYPE(NODE_VALUE_F)
|
||||
SHADER_NODE_TYPE(NODE_VALUE_V)
|
||||
SHADER_NODE_TYPE(NODE_ATTR)
|
||||
SHADER_NODE_TYPE(NODE_VERTEX_COLOR)
|
||||
SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DX)
|
||||
SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DY)
|
||||
SHADER_NODE_TYPE(NODE_SET_DISPLACEMENT)
|
||||
SHADER_NODE_TYPE(NODE_DISPLACEMENT)
|
||||
SHADER_NODE_TYPE(NODE_VECTOR_DISPLACEMENT)
|
||||
SHADER_NODE_TYPE(NODE_TEX_IMAGE)
|
||||
SHADER_NODE_TYPE(NODE_TEX_IMAGE_BOX)
|
||||
SHADER_NODE_TYPE(NODE_TEX_NOISE)
|
||||
SHADER_NODE_TYPE(NODE_SET_BUMP)
|
||||
SHADER_NODE_TYPE(NODE_ATTR_BUMP_DX)
|
||||
SHADER_NODE_TYPE(NODE_ATTR_BUMP_DY)
|
||||
SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DX)
|
||||
SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DY)
|
||||
SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DX)
|
||||
SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DY)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_SET_NORMAL)
|
||||
SHADER_NODE_TYPE(NODE_ENTER_BUMP_EVAL)
|
||||
SHADER_NODE_TYPE(NODE_LEAVE_BUMP_EVAL)
|
||||
SHADER_NODE_TYPE(NODE_HSV)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_HOLDOUT)
|
||||
SHADER_NODE_TYPE(NODE_FRESNEL)
|
||||
SHADER_NODE_TYPE(NODE_LAYER_WEIGHT)
|
||||
SHADER_NODE_TYPE(NODE_CLOSURE_VOLUME)
|
||||
SHADER_NODE_TYPE(NODE_PRINCIPLED_VOLUME)
|
||||
SHADER_NODE_TYPE(NODE_MATH)
|
||||
SHADER_NODE_TYPE(NODE_VECTOR_MATH)
|
||||
SHADER_NODE_TYPE(NODE_RGB_RAMP)
|
||||
SHADER_NODE_TYPE(NODE_GAMMA)
|
||||
SHADER_NODE_TYPE(NODE_BRIGHTCONTRAST)
|
||||
SHADER_NODE_TYPE(NODE_LIGHT_PATH)
|
||||
SHADER_NODE_TYPE(NODE_OBJECT_INFO)
|
||||
SHADER_NODE_TYPE(NODE_PARTICLE_INFO)
|
||||
SHADER_NODE_TYPE(NODE_HAIR_INFO)
|
||||
SHADER_NODE_TYPE(NODE_POINT_INFO)
|
||||
SHADER_NODE_TYPE(NODE_TEXTURE_MAPPING)
|
||||
SHADER_NODE_TYPE(NODE_MAPPING)
|
||||
SHADER_NODE_TYPE(NODE_MIN_MAX)
|
||||
SHADER_NODE_TYPE(NODE_CAMERA)
|
||||
SHADER_NODE_TYPE(NODE_TEX_ENVIRONMENT)
|
||||
SHADER_NODE_TYPE(NODE_TEX_SKY)
|
||||
SHADER_NODE_TYPE(NODE_TEX_GRADIENT)
|
||||
SHADER_NODE_TYPE(NODE_TEX_VORONOI)
|
||||
SHADER_NODE_TYPE(NODE_TEX_MUSGRAVE)
|
||||
SHADER_NODE_TYPE(NODE_TEX_WAVE)
|
||||
SHADER_NODE_TYPE(NODE_TEX_MAGIC)
|
||||
SHADER_NODE_TYPE(NODE_TEX_CHECKER)
|
||||
SHADER_NODE_TYPE(NODE_TEX_BRICK)
|
||||
SHADER_NODE_TYPE(NODE_TEX_WHITE_NOISE)
|
||||
SHADER_NODE_TYPE(NODE_NORMAL)
|
||||
SHADER_NODE_TYPE(NODE_LIGHT_FALLOFF)
|
||||
SHADER_NODE_TYPE(NODE_IES)
|
||||
SHADER_NODE_TYPE(NODE_CURVES)
|
||||
SHADER_NODE_TYPE(NODE_TANGENT)
|
||||
SHADER_NODE_TYPE(NODE_NORMAL_MAP)
|
||||
SHADER_NODE_TYPE(NODE_INVERT)
|
||||
SHADER_NODE_TYPE(NODE_MIX)
|
||||
SHADER_NODE_TYPE(NODE_SEPARATE_COLOR)
|
||||
SHADER_NODE_TYPE(NODE_COMBINE_COLOR)
|
||||
SHADER_NODE_TYPE(NODE_SEPARATE_VECTOR)
|
||||
SHADER_NODE_TYPE(NODE_COMBINE_VECTOR)
|
||||
SHADER_NODE_TYPE(NODE_SEPARATE_HSV)
|
||||
SHADER_NODE_TYPE(NODE_COMBINE_HSV)
|
||||
SHADER_NODE_TYPE(NODE_VECTOR_ROTATE)
|
||||
SHADER_NODE_TYPE(NODE_VECTOR_TRANSFORM)
|
||||
SHADER_NODE_TYPE(NODE_WIREFRAME)
|
||||
SHADER_NODE_TYPE(NODE_WAVELENGTH)
|
||||
SHADER_NODE_TYPE(NODE_BLACKBODY)
|
||||
SHADER_NODE_TYPE(NODE_MAP_RANGE)
|
||||
SHADER_NODE_TYPE(NODE_VECTOR_MAP_RANGE)
|
||||
SHADER_NODE_TYPE(NODE_CLAMP)
|
||||
SHADER_NODE_TYPE(NODE_BEVEL)
|
||||
SHADER_NODE_TYPE(NODE_AMBIENT_OCCLUSION)
|
||||
SHADER_NODE_TYPE(NODE_TEX_VOXEL)
|
||||
SHADER_NODE_TYPE(NODE_AOV_START)
|
||||
SHADER_NODE_TYPE(NODE_AOV_COLOR)
|
||||
SHADER_NODE_TYPE(NODE_AOV_VALUE)
|
||||
SHADER_NODE_TYPE(NODE_FLOAT_CURVE)
|
||||
|
||||
/* Padding for struct alignment. */
|
||||
SHADER_NODE_TYPE(NODE_PAD1)
|
||||
|
||||
#undef SHADER_NODE_TYPE
|
@@ -204,6 +204,15 @@ CCL_NAMESPACE_END
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_USE_DATA_CONSTANTS__
|
||||
# define SVM_CASE(node) \
|
||||
case node: \
|
||||
if (!kernel_data_svm_usage_##node) \
|
||||
break;
|
||||
#else
|
||||
# define SVM_CASE(node) case node:
|
||||
#endif
|
||||
|
||||
/* Main Interpreter Loop */
|
||||
template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState>
|
||||
ccl_device void svm_eval_nodes(KernelGlobals kg,
|
||||
@@ -219,9 +228,10 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
|
||||
uint4 node = read_node(kg, &offset);
|
||||
|
||||
switch (node.x) {
|
||||
case NODE_END:
|
||||
return;
|
||||
case NODE_SHADER_JUMP: {
|
||||
SVM_CASE(NODE_END)
|
||||
return;
|
||||
SVM_CASE(NODE_SHADER_JUMP)
|
||||
{
|
||||
if (type == SHADER_TYPE_SURFACE)
|
||||
offset = node.y;
|
||||
else if (type == SHADER_TYPE_VOLUME)
|
||||
@@ -232,351 +242,349 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
|
||||
return;
|
||||
break;
|
||||
}
|
||||
case NODE_CLOSURE_BSDF:
|
||||
offset = svm_node_closure_bsdf<node_feature_mask, type>(
|
||||
kg, sd, stack, node, path_flag, offset);
|
||||
break;
|
||||
case NODE_CLOSURE_EMISSION:
|
||||
IF_KERNEL_NODES_FEATURE(EMISSION)
|
||||
{
|
||||
svm_node_closure_emission(sd, stack, node);
|
||||
}
|
||||
break;
|
||||
case NODE_CLOSURE_BACKGROUND:
|
||||
IF_KERNEL_NODES_FEATURE(EMISSION)
|
||||
{
|
||||
svm_node_closure_background(sd, stack, node);
|
||||
}
|
||||
break;
|
||||
case NODE_CLOSURE_SET_WEIGHT:
|
||||
svm_node_closure_set_weight(sd, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_CLOSURE_WEIGHT:
|
||||
svm_node_closure_weight(sd, stack, node.y);
|
||||
break;
|
||||
case NODE_EMISSION_WEIGHT:
|
||||
IF_KERNEL_NODES_FEATURE(EMISSION)
|
||||
{
|
||||
svm_node_emission_weight(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
case NODE_MIX_CLOSURE:
|
||||
svm_node_mix_closure(sd, stack, node);
|
||||
break;
|
||||
case NODE_JUMP_IF_ZERO:
|
||||
if (stack_load_float(stack, node.z) <= 0.0f)
|
||||
offset += node.y;
|
||||
break;
|
||||
case NODE_JUMP_IF_ONE:
|
||||
if (stack_load_float(stack, node.z) >= 1.0f)
|
||||
offset += node.y;
|
||||
break;
|
||||
case NODE_GEOMETRY:
|
||||
svm_node_geometry(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
case NODE_CONVERT:
|
||||
svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_TEX_COORD:
|
||||
offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
|
||||
break;
|
||||
case NODE_VALUE_F:
|
||||
svm_node_value_f(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
case NODE_VALUE_V:
|
||||
offset = svm_node_value_v(kg, sd, stack, node.y, offset);
|
||||
break;
|
||||
case NODE_ATTR:
|
||||
svm_node_attr<node_feature_mask>(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_VERTEX_COLOR:
|
||||
svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_GEOMETRY_BUMP_DX:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
|
||||
}
|
||||
break;
|
||||
case NODE_GEOMETRY_BUMP_DY:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
|
||||
}
|
||||
break;
|
||||
case NODE_SET_DISPLACEMENT:
|
||||
svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y);
|
||||
break;
|
||||
case NODE_DISPLACEMENT:
|
||||
svm_node_displacement<node_feature_mask>(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_VECTOR_DISPLACEMENT:
|
||||
offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TEX_IMAGE:
|
||||
offset = svm_node_tex_image(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TEX_IMAGE_BOX:
|
||||
svm_node_tex_image_box(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_TEX_NOISE:
|
||||
offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_SET_BUMP:
|
||||
svm_node_set_bump<node_feature_mask>(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_ATTR_BUMP_DX:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_attr_bump_dx(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
case NODE_ATTR_BUMP_DY:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_attr_bump_dy(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
case NODE_VERTEX_COLOR_BUMP_DX:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
|
||||
}
|
||||
break;
|
||||
case NODE_VERTEX_COLOR_BUMP_DY:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
|
||||
}
|
||||
break;
|
||||
case NODE_TEX_COORD_BUMP_DX:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
|
||||
}
|
||||
break;
|
||||
case NODE_TEX_COORD_BUMP_DY:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
|
||||
}
|
||||
break;
|
||||
case NODE_CLOSURE_SET_NORMAL:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_set_normal(kg, sd, stack, node.y, node.z);
|
||||
}
|
||||
break;
|
||||
case NODE_ENTER_BUMP_EVAL:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP_STATE)
|
||||
{
|
||||
svm_node_enter_bump_eval(kg, sd, stack, node.y);
|
||||
}
|
||||
break;
|
||||
case NODE_LEAVE_BUMP_EVAL:
|
||||
IF_KERNEL_NODES_FEATURE(BUMP_STATE)
|
||||
{
|
||||
svm_node_leave_bump_eval(kg, sd, stack, node.y);
|
||||
}
|
||||
break;
|
||||
case NODE_HSV:
|
||||
svm_node_hsv(kg, sd, stack, node);
|
||||
break;
|
||||
|
||||
case NODE_CLOSURE_HOLDOUT:
|
||||
svm_node_closure_holdout(sd, stack, node);
|
||||
break;
|
||||
case NODE_FRESNEL:
|
||||
svm_node_fresnel(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_LAYER_WEIGHT:
|
||||
svm_node_layer_weight(sd, stack, node);
|
||||
break;
|
||||
case NODE_CLOSURE_VOLUME:
|
||||
IF_KERNEL_NODES_FEATURE(VOLUME)
|
||||
{
|
||||
svm_node_closure_volume<type>(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
case NODE_PRINCIPLED_VOLUME:
|
||||
IF_KERNEL_NODES_FEATURE(VOLUME)
|
||||
{
|
||||
offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
|
||||
}
|
||||
break;
|
||||
case NODE_MATH:
|
||||
svm_node_math(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_VECTOR_MATH:
|
||||
offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_RGB_RAMP:
|
||||
offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_GAMMA:
|
||||
svm_node_gamma(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_BRIGHTCONTRAST:
|
||||
svm_node_brightness(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_LIGHT_PATH:
|
||||
svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
|
||||
break;
|
||||
case NODE_OBJECT_INFO:
|
||||
svm_node_object_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
case NODE_PARTICLE_INFO:
|
||||
svm_node_particle_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_BSDF)
|
||||
offset = svm_node_closure_bsdf<node_feature_mask, type>(
|
||||
kg, sd, stack, node, path_flag, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_EMISSION)
|
||||
IF_KERNEL_NODES_FEATURE(EMISSION)
|
||||
{
|
||||
svm_node_closure_emission(sd, stack, node);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_BACKGROUND)
|
||||
IF_KERNEL_NODES_FEATURE(EMISSION)
|
||||
{
|
||||
svm_node_closure_background(sd, stack, node);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_SET_WEIGHT)
|
||||
svm_node_closure_set_weight(sd, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_WEIGHT)
|
||||
svm_node_closure_weight(sd, stack, node.y);
|
||||
break;
|
||||
SVM_CASE(NODE_EMISSION_WEIGHT)
|
||||
IF_KERNEL_NODES_FEATURE(EMISSION)
|
||||
{
|
||||
svm_node_emission_weight(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_MIX_CLOSURE)
|
||||
svm_node_mix_closure(sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_JUMP_IF_ZERO)
|
||||
if (stack_load_float(stack, node.z) <= 0.0f)
|
||||
offset += node.y;
|
||||
break;
|
||||
SVM_CASE(NODE_JUMP_IF_ONE)
|
||||
if (stack_load_float(stack, node.z) >= 1.0f)
|
||||
offset += node.y;
|
||||
break;
|
||||
SVM_CASE(NODE_GEOMETRY)
|
||||
svm_node_geometry(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_CONVERT)
|
||||
svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_COORD)
|
||||
offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_VALUE_F)
|
||||
svm_node_value_f(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_VALUE_V)
|
||||
offset = svm_node_value_v(kg, sd, stack, node.y, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_ATTR)
|
||||
svm_node_attr<node_feature_mask>(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_VERTEX_COLOR)
|
||||
svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_GEOMETRY_BUMP_DX)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_GEOMETRY_BUMP_DY)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_SET_DISPLACEMENT)
|
||||
svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y);
|
||||
break;
|
||||
SVM_CASE(NODE_DISPLACEMENT)
|
||||
svm_node_displacement<node_feature_mask>(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_VECTOR_DISPLACEMENT)
|
||||
offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_IMAGE)
|
||||
offset = svm_node_tex_image(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_IMAGE_BOX)
|
||||
svm_node_tex_image_box(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_NOISE)
|
||||
offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_SET_BUMP)
|
||||
svm_node_set_bump<node_feature_mask>(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_ATTR_BUMP_DX)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_attr_bump_dx(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_ATTR_BUMP_DY)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_attr_bump_dy(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_VERTEX_COLOR_BUMP_DX)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_VERTEX_COLOR_BUMP_DY)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_COORD_BUMP_DX)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_COORD_BUMP_DY)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_SET_NORMAL)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP)
|
||||
{
|
||||
svm_node_set_normal(kg, sd, stack, node.y, node.z);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_ENTER_BUMP_EVAL)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP_STATE)
|
||||
{
|
||||
svm_node_enter_bump_eval(kg, sd, stack, node.y);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_LEAVE_BUMP_EVAL)
|
||||
IF_KERNEL_NODES_FEATURE(BUMP_STATE)
|
||||
{
|
||||
svm_node_leave_bump_eval(kg, sd, stack, node.y);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_HSV)
|
||||
svm_node_hsv(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_HOLDOUT)
|
||||
svm_node_closure_holdout(sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_FRESNEL)
|
||||
svm_node_fresnel(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_LAYER_WEIGHT)
|
||||
svm_node_layer_weight(sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_CLOSURE_VOLUME)
|
||||
IF_KERNEL_NODES_FEATURE(VOLUME)
|
||||
{
|
||||
svm_node_closure_volume<type>(kg, sd, stack, node);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_PRINCIPLED_VOLUME)
|
||||
IF_KERNEL_NODES_FEATURE(VOLUME)
|
||||
{
|
||||
offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_MATH)
|
||||
svm_node_math(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_VECTOR_MATH)
|
||||
offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_RGB_RAMP)
|
||||
offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_GAMMA)
|
||||
svm_node_gamma(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_BRIGHTCONTRAST)
|
||||
svm_node_brightness(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_LIGHT_PATH)
|
||||
svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
|
||||
break;
|
||||
SVM_CASE(NODE_OBJECT_INFO)
|
||||
svm_node_object_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_PARTICLE_INFO)
|
||||
svm_node_particle_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
#if defined(__HAIR__)
|
||||
case NODE_HAIR_INFO:
|
||||
svm_node_hair_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_HAIR_INFO)
|
||||
svm_node_hair_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
#endif
|
||||
#if defined(__POINTCLOUD__)
|
||||
case NODE_POINT_INFO:
|
||||
svm_node_point_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_POINT_INFO)
|
||||
svm_node_point_info(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
#endif
|
||||
case NODE_TEXTURE_MAPPING:
|
||||
offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
|
||||
break;
|
||||
case NODE_MAPPING:
|
||||
svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_MIN_MAX:
|
||||
offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
|
||||
break;
|
||||
case NODE_CAMERA:
|
||||
svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_TEX_ENVIRONMENT:
|
||||
svm_node_tex_environment(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_TEX_SKY:
|
||||
offset = svm_node_tex_sky(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TEX_GRADIENT:
|
||||
svm_node_tex_gradient(sd, stack, node);
|
||||
break;
|
||||
case NODE_TEX_VORONOI:
|
||||
offset = svm_node_tex_voronoi<node_feature_mask>(
|
||||
kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_TEX_MUSGRAVE:
|
||||
offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_TEX_WAVE:
|
||||
offset = svm_node_tex_wave(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TEX_MAGIC:
|
||||
offset = svm_node_tex_magic(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TEX_CHECKER:
|
||||
svm_node_tex_checker(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_TEX_BRICK:
|
||||
offset = svm_node_tex_brick(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TEX_WHITE_NOISE:
|
||||
svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_NORMAL:
|
||||
offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_LIGHT_FALLOFF:
|
||||
svm_node_light_falloff(sd, stack, node);
|
||||
break;
|
||||
case NODE_IES:
|
||||
svm_node_ies(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_RGB_CURVES:
|
||||
case NODE_VECTOR_CURVES:
|
||||
offset = svm_node_curves(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_FLOAT_CURVE:
|
||||
offset = svm_node_curve(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TANGENT:
|
||||
svm_node_tangent(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_NORMAL_MAP:
|
||||
svm_node_normal_map(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_INVERT:
|
||||
svm_node_invert(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_MIX:
|
||||
offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_SEPARATE_COLOR:
|
||||
svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_COMBINE_COLOR:
|
||||
svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_SEPARATE_VECTOR:
|
||||
svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_COMBINE_VECTOR:
|
||||
svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_SEPARATE_HSV:
|
||||
offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_COMBINE_HSV:
|
||||
offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_VECTOR_ROTATE:
|
||||
svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
case NODE_VECTOR_TRANSFORM:
|
||||
svm_node_vector_transform(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_WIREFRAME:
|
||||
svm_node_wireframe(kg, sd, stack, node);
|
||||
break;
|
||||
case NODE_WAVELENGTH:
|
||||
svm_node_wavelength(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
case NODE_BLACKBODY:
|
||||
svm_node_blackbody(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
case NODE_MAP_RANGE:
|
||||
offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_VECTOR_MAP_RANGE:
|
||||
offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
case NODE_CLAMP:
|
||||
offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEXTURE_MAPPING)
|
||||
offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_MAPPING)
|
||||
svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_MIN_MAX)
|
||||
offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_CAMERA)
|
||||
svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_ENVIRONMENT)
|
||||
svm_node_tex_environment(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_SKY)
|
||||
offset = svm_node_tex_sky(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_GRADIENT)
|
||||
svm_node_tex_gradient(sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_VORONOI)
|
||||
offset = svm_node_tex_voronoi<node_feature_mask>(
|
||||
kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_MUSGRAVE)
|
||||
offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_WAVE)
|
||||
offset = svm_node_tex_wave(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_MAGIC)
|
||||
offset = svm_node_tex_magic(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_CHECKER)
|
||||
svm_node_tex_checker(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_BRICK)
|
||||
offset = svm_node_tex_brick(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_WHITE_NOISE)
|
||||
svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_NORMAL)
|
||||
offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_LIGHT_FALLOFF)
|
||||
svm_node_light_falloff(sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_IES)
|
||||
svm_node_ies(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_CURVES)
|
||||
offset = svm_node_curves(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_FLOAT_CURVE)
|
||||
offset = svm_node_curve(kg, sd, stack, node, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_TANGENT)
|
||||
svm_node_tangent(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_NORMAL_MAP)
|
||||
svm_node_normal_map(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_INVERT)
|
||||
svm_node_invert(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_MIX)
|
||||
offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_SEPARATE_COLOR)
|
||||
svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_COMBINE_COLOR)
|
||||
svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_SEPARATE_VECTOR)
|
||||
svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_COMBINE_VECTOR)
|
||||
svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_SEPARATE_HSV)
|
||||
offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_COMBINE_HSV)
|
||||
offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_VECTOR_ROTATE)
|
||||
svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
|
||||
break;
|
||||
SVM_CASE(NODE_VECTOR_TRANSFORM)
|
||||
svm_node_vector_transform(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_WIREFRAME)
|
||||
svm_node_wireframe(kg, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_WAVELENGTH)
|
||||
svm_node_wavelength(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_BLACKBODY)
|
||||
svm_node_blackbody(kg, sd, stack, node.y, node.z);
|
||||
break;
|
||||
SVM_CASE(NODE_MAP_RANGE)
|
||||
offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_VECTOR_MAP_RANGE)
|
||||
offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
SVM_CASE(NODE_CLAMP)
|
||||
offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
|
||||
break;
|
||||
#ifdef __SHADER_RAYTRACE__
|
||||
case NODE_BEVEL:
|
||||
svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
|
||||
break;
|
||||
case NODE_AMBIENT_OCCLUSION:
|
||||
svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_BEVEL)
|
||||
svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
|
||||
break;
|
||||
SVM_CASE(NODE_AMBIENT_OCCLUSION)
|
||||
svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case NODE_TEX_VOXEL:
|
||||
IF_KERNEL_NODES_FEATURE(VOLUME)
|
||||
{
|
||||
offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
|
||||
}
|
||||
break;
|
||||
case NODE_AOV_START:
|
||||
if (!svm_node_aov_check(path_flag, render_buffer)) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case NODE_AOV_COLOR:
|
||||
svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
|
||||
break;
|
||||
case NODE_AOV_VALUE:
|
||||
svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
|
||||
break;
|
||||
SVM_CASE(NODE_TEX_VOXEL)
|
||||
IF_KERNEL_NODES_FEATURE(VOLUME)
|
||||
{
|
||||
offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_AOV_START)
|
||||
if (!svm_node_aov_check(path_flag, render_buffer)) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
SVM_CASE(NODE_AOV_COLOR)
|
||||
svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
|
||||
break;
|
||||
SVM_CASE(NODE_AOV_VALUE)
|
||||
svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
|
||||
break;
|
||||
default:
|
||||
kernel_assert(!"Unknown node type was passed to the SVM machine");
|
||||
return;
|
||||
|
@@ -138,7 +138,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
|
||||
case NODE_TEXCO_WINDOW: {
|
||||
if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
|
||||
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
|
||||
data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f));
|
||||
data = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
else
|
||||
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
|
||||
data.z = 0.0f;
|
||||
@@ -223,7 +223,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
|
||||
case NODE_TEXCO_WINDOW: {
|
||||
if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
|
||||
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
|
||||
data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f));
|
||||
data = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
else
|
||||
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
|
||||
data.z = 0.0f;
|
||||
|
@@ -17,104 +17,9 @@ CCL_NAMESPACE_BEGIN
|
||||
/* Nodes */
|
||||
|
||||
typedef enum ShaderNodeType {
|
||||
NODE_END = 0,
|
||||
NODE_SHADER_JUMP,
|
||||
NODE_CLOSURE_BSDF,
|
||||
NODE_CLOSURE_EMISSION,
|
||||
NODE_CLOSURE_BACKGROUND,
|
||||
NODE_CLOSURE_SET_WEIGHT,
|
||||
NODE_CLOSURE_WEIGHT,
|
||||
NODE_EMISSION_WEIGHT,
|
||||
NODE_MIX_CLOSURE,
|
||||
NODE_JUMP_IF_ZERO,
|
||||
NODE_JUMP_IF_ONE,
|
||||
NODE_GEOMETRY,
|
||||
NODE_CONVERT,
|
||||
NODE_TEX_COORD,
|
||||
NODE_VALUE_F,
|
||||
NODE_VALUE_V,
|
||||
NODE_ATTR,
|
||||
NODE_VERTEX_COLOR,
|
||||
NODE_GEOMETRY_BUMP_DX,
|
||||
NODE_GEOMETRY_BUMP_DY,
|
||||
NODE_SET_DISPLACEMENT,
|
||||
NODE_DISPLACEMENT,
|
||||
NODE_VECTOR_DISPLACEMENT,
|
||||
NODE_TEX_IMAGE,
|
||||
NODE_TEX_IMAGE_BOX,
|
||||
NODE_TEX_NOISE,
|
||||
NODE_SET_BUMP,
|
||||
NODE_ATTR_BUMP_DX,
|
||||
NODE_ATTR_BUMP_DY,
|
||||
NODE_VERTEX_COLOR_BUMP_DX,
|
||||
NODE_VERTEX_COLOR_BUMP_DY,
|
||||
NODE_TEX_COORD_BUMP_DX,
|
||||
NODE_TEX_COORD_BUMP_DY,
|
||||
NODE_CLOSURE_SET_NORMAL,
|
||||
NODE_ENTER_BUMP_EVAL,
|
||||
NODE_LEAVE_BUMP_EVAL,
|
||||
NODE_HSV,
|
||||
NODE_CLOSURE_HOLDOUT,
|
||||
NODE_FRESNEL,
|
||||
NODE_LAYER_WEIGHT,
|
||||
NODE_CLOSURE_VOLUME,
|
||||
NODE_PRINCIPLED_VOLUME,
|
||||
NODE_MATH,
|
||||
NODE_VECTOR_MATH,
|
||||
NODE_RGB_RAMP,
|
||||
NODE_GAMMA,
|
||||
NODE_BRIGHTCONTRAST,
|
||||
NODE_LIGHT_PATH,
|
||||
NODE_OBJECT_INFO,
|
||||
NODE_PARTICLE_INFO,
|
||||
NODE_HAIR_INFO,
|
||||
NODE_POINT_INFO,
|
||||
NODE_TEXTURE_MAPPING,
|
||||
NODE_MAPPING,
|
||||
NODE_MIN_MAX,
|
||||
NODE_CAMERA,
|
||||
NODE_TEX_ENVIRONMENT,
|
||||
NODE_TEX_SKY,
|
||||
NODE_TEX_GRADIENT,
|
||||
NODE_TEX_VORONOI,
|
||||
NODE_TEX_MUSGRAVE,
|
||||
NODE_TEX_WAVE,
|
||||
NODE_TEX_MAGIC,
|
||||
NODE_TEX_CHECKER,
|
||||
NODE_TEX_BRICK,
|
||||
NODE_TEX_WHITE_NOISE,
|
||||
NODE_NORMAL,
|
||||
NODE_LIGHT_FALLOFF,
|
||||
NODE_IES,
|
||||
NODE_RGB_CURVES,
|
||||
NODE_VECTOR_CURVES,
|
||||
NODE_TANGENT,
|
||||
NODE_NORMAL_MAP,
|
||||
NODE_INVERT,
|
||||
NODE_MIX,
|
||||
NODE_SEPARATE_COLOR,
|
||||
NODE_COMBINE_COLOR,
|
||||
NODE_SEPARATE_VECTOR,
|
||||
NODE_COMBINE_VECTOR,
|
||||
NODE_SEPARATE_HSV,
|
||||
NODE_COMBINE_HSV,
|
||||
NODE_VECTOR_ROTATE,
|
||||
NODE_VECTOR_TRANSFORM,
|
||||
NODE_WIREFRAME,
|
||||
NODE_WAVELENGTH,
|
||||
NODE_BLACKBODY,
|
||||
NODE_MAP_RANGE,
|
||||
NODE_VECTOR_MAP_RANGE,
|
||||
NODE_CLAMP,
|
||||
NODE_BEVEL,
|
||||
NODE_AMBIENT_OCCLUSION,
|
||||
NODE_TEX_VOXEL,
|
||||
NODE_AOV_START,
|
||||
NODE_AOV_COLOR,
|
||||
NODE_AOV_VALUE,
|
||||
NODE_FLOAT_CURVE,
|
||||
/* NOTE: for best OpenCL performance, item definition in the enum must
|
||||
* match the switch case order in `svm.h`. */
|
||||
#define SHADER_NODE_TYPE(name) name,
|
||||
#include "node_types_template.h"
|
||||
NODE_NUM
|
||||
} ShaderNodeType;
|
||||
|
||||
typedef enum NodeAttributeOutputType {
|
||||
|
@@ -535,7 +535,8 @@ typedef struct RaySelfPrimitives {
|
||||
typedef struct Ray {
|
||||
float3 P; /* origin */
|
||||
float3 D; /* direction */
|
||||
float t; /* length of the ray */
|
||||
float tmin; /* start distance */
|
||||
float tmax; /* end distance */
|
||||
float time; /* time (for motion blur) */
|
||||
|
||||
RaySelfPrimitives self;
|
||||
@@ -1072,94 +1073,6 @@ typedef struct KernelCamera {
|
||||
} KernelCamera;
|
||||
static_assert_align(KernelCamera, 16);
|
||||
|
||||
typedef struct KernelFilm {
|
||||
float exposure;
|
||||
int pass_flag;
|
||||
|
||||
int light_pass_flag;
|
||||
int pass_stride;
|
||||
|
||||
int pass_combined;
|
||||
int pass_depth;
|
||||
int pass_position;
|
||||
int pass_normal;
|
||||
int pass_roughness;
|
||||
int pass_motion;
|
||||
|
||||
int pass_motion_weight;
|
||||
int pass_uv;
|
||||
int pass_object_id;
|
||||
int pass_material_id;
|
||||
|
||||
int pass_diffuse_color;
|
||||
int pass_glossy_color;
|
||||
int pass_transmission_color;
|
||||
|
||||
int pass_diffuse_indirect;
|
||||
int pass_glossy_indirect;
|
||||
int pass_transmission_indirect;
|
||||
int pass_volume_indirect;
|
||||
|
||||
int pass_diffuse_direct;
|
||||
int pass_glossy_direct;
|
||||
int pass_transmission_direct;
|
||||
int pass_volume_direct;
|
||||
|
||||
int pass_emission;
|
||||
int pass_background;
|
||||
int pass_ao;
|
||||
float pass_alpha_threshold;
|
||||
|
||||
int pass_shadow;
|
||||
float pass_shadow_scale;
|
||||
|
||||
int pass_shadow_catcher;
|
||||
int pass_shadow_catcher_sample_count;
|
||||
int pass_shadow_catcher_matte;
|
||||
|
||||
int filter_table_offset;
|
||||
|
||||
int cryptomatte_passes;
|
||||
int cryptomatte_depth;
|
||||
int pass_cryptomatte;
|
||||
|
||||
int pass_adaptive_aux_buffer;
|
||||
int pass_sample_count;
|
||||
|
||||
int pass_mist;
|
||||
float mist_start;
|
||||
float mist_inv_depth;
|
||||
float mist_falloff;
|
||||
|
||||
int pass_denoising_normal;
|
||||
int pass_denoising_albedo;
|
||||
int pass_denoising_depth;
|
||||
|
||||
int pass_aov_color;
|
||||
int pass_aov_value;
|
||||
int pass_lightgroup;
|
||||
|
||||
/* XYZ to rendering color space transform. float4 instead of float3 to
|
||||
* ensure consistent padding/alignment across devices. */
|
||||
float4 xyz_to_r;
|
||||
float4 xyz_to_g;
|
||||
float4 xyz_to_b;
|
||||
float4 rgb_to_y;
|
||||
/* Rec709 to rendering color space. */
|
||||
float4 rec709_to_r;
|
||||
float4 rec709_to_g;
|
||||
float4 rec709_to_b;
|
||||
int is_rec709;
|
||||
|
||||
int pass_bake_primitive;
|
||||
int pass_bake_differential;
|
||||
|
||||
int use_approximate_shadow_catcher;
|
||||
|
||||
int pad1;
|
||||
} KernelFilm;
|
||||
static_assert_align(KernelFilm, 16);
|
||||
|
||||
typedef struct KernelFilmConvert {
|
||||
int pass_offset;
|
||||
int pass_stride;
|
||||
@@ -1201,108 +1114,6 @@ typedef struct KernelFilmConvert {
|
||||
} KernelFilmConvert;
|
||||
static_assert_align(KernelFilmConvert, 16);
|
||||
|
||||
typedef struct KernelBackground {
|
||||
/* only shader index */
|
||||
int surface_shader;
|
||||
int volume_shader;
|
||||
float volume_step_size;
|
||||
int transparent;
|
||||
float transparent_roughness_squared_threshold;
|
||||
|
||||
/* portal sampling */
|
||||
float portal_weight;
|
||||
int num_portals;
|
||||
int portal_offset;
|
||||
|
||||
/* sun sampling */
|
||||
float sun_weight;
|
||||
/* xyz store direction, w the angle. float4 instead of float3 is used
|
||||
* to ensure consistent padding/alignment across devices. */
|
||||
float4 sun;
|
||||
|
||||
/* map sampling */
|
||||
float map_weight;
|
||||
int map_res_x;
|
||||
int map_res_y;
|
||||
|
||||
int use_mis;
|
||||
|
||||
int lightgroup;
|
||||
|
||||
/* Padding */
|
||||
int pad1, pad2;
|
||||
} KernelBackground;
|
||||
static_assert_align(KernelBackground, 16);
|
||||
|
||||
typedef struct KernelIntegrator {
|
||||
/* emission */
|
||||
int use_direct_light;
|
||||
int num_distribution;
|
||||
int num_all_lights;
|
||||
float pdf_triangles;
|
||||
float pdf_lights;
|
||||
float light_inv_rr_threshold;
|
||||
|
||||
/* bounces */
|
||||
int min_bounce;
|
||||
int max_bounce;
|
||||
|
||||
int max_diffuse_bounce;
|
||||
int max_glossy_bounce;
|
||||
int max_transmission_bounce;
|
||||
int max_volume_bounce;
|
||||
|
||||
/* AO bounces */
|
||||
int ao_bounces;
|
||||
float ao_bounces_distance;
|
||||
float ao_bounces_factor;
|
||||
float ao_additive_factor;
|
||||
|
||||
/* transparent */
|
||||
int transparent_min_bounce;
|
||||
int transparent_max_bounce;
|
||||
int transparent_shadows;
|
||||
|
||||
/* caustics */
|
||||
int caustics_reflective;
|
||||
int caustics_refractive;
|
||||
float filter_glossy;
|
||||
|
||||
/* seed */
|
||||
int seed;
|
||||
|
||||
/* clamp */
|
||||
float sample_clamp_direct;
|
||||
float sample_clamp_indirect;
|
||||
|
||||
/* mis */
|
||||
int use_lamp_mis;
|
||||
|
||||
/* caustics */
|
||||
int use_caustics;
|
||||
|
||||
/* sampler */
|
||||
int sampling_pattern;
|
||||
|
||||
/* volume render */
|
||||
int use_volumes;
|
||||
int volume_max_steps;
|
||||
float volume_step_rate;
|
||||
|
||||
int has_shadow_catcher;
|
||||
float scrambling_distance;
|
||||
|
||||
/* Closure filter. */
|
||||
int filter_closures;
|
||||
|
||||
/* MIS debugging. */
|
||||
int direct_light_sampling_type;
|
||||
|
||||
/* padding */
|
||||
int pad1;
|
||||
} KernelIntegrator;
|
||||
static_assert_align(KernelIntegrator, 16);
|
||||
|
||||
typedef enum KernelBVHLayout {
|
||||
BVH_LAYOUT_NONE = 0,
|
||||
|
||||
@@ -1320,36 +1131,25 @@ typedef enum KernelBVHLayout {
|
||||
BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
|
||||
} KernelBVHLayout;
|
||||
|
||||
typedef struct KernelBVH {
|
||||
/* Own BVH */
|
||||
int root;
|
||||
int have_motion;
|
||||
int have_curves;
|
||||
int bvh_layout;
|
||||
int use_bvh_steps;
|
||||
int curve_subdivisions;
|
||||
/* Specialized struct that can become constants in dynamic compilation. */
|
||||
#define KERNEL_STRUCT_BEGIN(name, parent) struct name {
|
||||
#define KERNEL_STRUCT_END(name) \
|
||||
} \
|
||||
; \
|
||||
static_assert_align(name, 16);
|
||||
|
||||
/* Custom BVH */
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
OptixTraversableHandle scene;
|
||||
#elif defined __METALRT__
|
||||
metalrt_as_type scene;
|
||||
#ifdef __KERNEL_USE_DATA_CONSTANTS__
|
||||
# define KERNEL_STRUCT_MEMBER(parent, type, name) type __unused_##name;
|
||||
#else
|
||||
# ifdef __EMBREE__
|
||||
RTCScene scene;
|
||||
# ifndef __KERNEL_64_BIT__
|
||||
int pad2;
|
||||
# endif
|
||||
# else
|
||||
int scene, pad2;
|
||||
# endif
|
||||
# define KERNEL_STRUCT_MEMBER(parent, type, name) type name;
|
||||
#endif
|
||||
} KernelBVH;
|
||||
static_assert_align(KernelBVH, 16);
|
||||
|
||||
#include "kernel/data_template.h"
|
||||
|
||||
typedef struct KernelTables {
|
||||
int beckmann_offset;
|
||||
int pad1, pad2, pad3;
|
||||
int filter_table_offset;
|
||||
int pad1, pad2;
|
||||
} KernelTables;
|
||||
static_assert_align(KernelTables, 16);
|
||||
|
||||
@@ -1362,18 +1162,37 @@ typedef struct KernelBake {
|
||||
static_assert_align(KernelBake, 16);
|
||||
|
||||
typedef struct KernelData {
|
||||
/* Features and limits. */
|
||||
uint kernel_features;
|
||||
uint max_closures;
|
||||
uint max_shaders;
|
||||
uint volume_stack_size;
|
||||
|
||||
/* Always dynamic data mambers. */
|
||||
KernelCamera cam;
|
||||
KernelFilm film;
|
||||
KernelBackground background;
|
||||
KernelIntegrator integrator;
|
||||
KernelBVH bvh;
|
||||
KernelTables tables;
|
||||
KernelBake bake;
|
||||
KernelTables tables;
|
||||
|
||||
/* Potentially specialized data members. */
|
||||
#define KERNEL_STRUCT_BEGIN(name, parent) name parent;
|
||||
#include "kernel/data_template.h"
|
||||
|
||||
/* Device specific BVH. */
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
OptixTraversableHandle device_bvh;
|
||||
#elif defined __METALRT__
|
||||
metalrt_as_type device_bvh;
|
||||
#else
|
||||
# ifdef __EMBREE__
|
||||
RTCScene device_bvh;
|
||||
# ifndef __KERNEL_64_BIT__
|
||||
int pad1;
|
||||
# endif
|
||||
# else
|
||||
int device_bvh, pad1;
|
||||
# endif
|
||||
#endif
|
||||
int pad2, pad3;
|
||||
} KernelData;
|
||||
static_assert_align(KernelData, 16);
|
||||
|
||||
|
@@ -394,7 +394,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
|
||||
vector<float> table = filter_table(filter_type, filter_width);
|
||||
scene->lookup_tables->remove_table(&filter_table_offset_);
|
||||
filter_table_offset_ = scene->lookup_tables->add_table(dscene, table);
|
||||
kfilm->filter_table_offset = (int)filter_table_offset_;
|
||||
dscene->data.tables.filter_table_offset = (int)filter_table_offset_;
|
||||
|
||||
/* mist pass parameters */
|
||||
kfilm->mist_start = mist_start;
|
||||
|
@@ -1362,7 +1362,7 @@ void GeometryManager::device_update_bvh(Device *device,
|
||||
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
|
||||
dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
|
||||
/* The scene handle is set in 'CPUDevice::const_copy_to' and 'OptiXDevice::const_copy_to' */
|
||||
dscene->data.bvh.scene = 0;
|
||||
dscene->data.device_bvh = 0;
|
||||
}
|
||||
|
||||
/* Set of flags used to help determining what data has been modified or needs reallocation, so we
|
||||
|
@@ -369,6 +369,8 @@ void Scene::device_update(Device *device_, Progress &progress)
|
||||
device->const_copy_to("data", &dscene.data, sizeof(dscene.data));
|
||||
}
|
||||
|
||||
device->optimize_for_scene(this);
|
||||
|
||||
if (print_stats) {
|
||||
size_t mem_used = util_guarded_get_mem_used();
|
||||
size_t mem_peak = util_guarded_get_mem_peak();
|
||||
|
@@ -82,7 +82,7 @@ class DeviceScene {
|
||||
|
||||
device_vector<uint> patches;
|
||||
|
||||
/* pointcloud */
|
||||
/* point-cloud */
|
||||
device_vector<float4> points;
|
||||
device_vector<uint> points_shader;
|
||||
|
||||
@@ -124,7 +124,7 @@ class DeviceScene {
|
||||
/* integrator */
|
||||
device_vector<float> sample_pattern_lut;
|
||||
|
||||
/* ies lights */
|
||||
/* IES lights */
|
||||
device_vector<float> ies_lights;
|
||||
|
||||
KernelData data;
|
||||
|
@@ -6671,7 +6671,7 @@ void CurvesNode::compile(SVMCompiler &compiler,
|
||||
|
||||
ShaderInput *fac_in = input("Fac");
|
||||
|
||||
compiler.add_node(type,
|
||||
compiler.add_node(ShaderNodeType(type),
|
||||
compiler.encode_uchar4(compiler.stack_assign(fac_in),
|
||||
compiler.stack_assign(value_in),
|
||||
compiler.stack_assign(value_out),
|
||||
@@ -6736,7 +6736,7 @@ void RGBCurvesNode::constant_fold(const ConstantFolder &folder)
|
||||
|
||||
void RGBCurvesNode::compile(SVMCompiler &compiler)
|
||||
{
|
||||
CurvesNode::compile(compiler, NODE_RGB_CURVES, input("Color"), output("Color"));
|
||||
CurvesNode::compile(compiler, NODE_CURVES, input("Color"), output("Color"));
|
||||
}
|
||||
|
||||
void RGBCurvesNode::compile(OSLCompiler &compiler)
|
||||
@@ -6774,7 +6774,7 @@ void VectorCurvesNode::constant_fold(const ConstantFolder &folder)
|
||||
|
||||
void VectorCurvesNode::compile(SVMCompiler &compiler)
|
||||
{
|
||||
CurvesNode::compile(compiler, NODE_VECTOR_CURVES, input("Vector"), output("Vector"));
|
||||
CurvesNode::compile(compiler, NODE_CURVES, input("Vector"), output("Vector"));
|
||||
}
|
||||
|
||||
void VectorCurvesNode::compile(OSLCompiler &compiler)
|
||||
|
@@ -44,8 +44,6 @@ void SVMShaderManager::device_update_shader(Scene *scene,
|
||||
}
|
||||
assert(shader->graph);
|
||||
|
||||
svm_nodes->push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
|
||||
|
||||
SVMCompiler::Summary summary;
|
||||
SVMCompiler compiler(scene);
|
||||
compiler.background = (shader == scene->background->get_shader(scene));
|
||||
@@ -170,6 +168,9 @@ SVMCompiler::SVMCompiler(Scene *scene) : scene(scene)
|
||||
background = false;
|
||||
mix_weight_offset = SVM_STACK_INVALID;
|
||||
compile_failed = false;
|
||||
|
||||
/* This struct has one entry for every node, in order of ShaderNodeType definition. */
|
||||
svm_node_types_used = (std::atomic_int *)&scene->dscene.data.svm_usage;
|
||||
}
|
||||
|
||||
int SVMCompiler::stack_size(SocketType::Type type)
|
||||
@@ -378,11 +379,13 @@ void SVMCompiler::add_node(int a, int b, int c, int d)
|
||||
|
||||
void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
|
||||
{
|
||||
svm_node_types_used[type] = true;
|
||||
current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
|
||||
}
|
||||
|
||||
void SVMCompiler::add_node(ShaderNodeType type, const float3 &f)
|
||||
{
|
||||
svm_node_types_used[type] = true;
|
||||
current_svm_nodes.push_back_slow(
|
||||
make_int4(type, __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z)));
|
||||
}
|
||||
@@ -663,6 +666,7 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
|
||||
/* Add instruction to skip closure and its dependencies if mix
|
||||
* weight is zero.
|
||||
*/
|
||||
svm_node_types_used[NODE_JUMP_IF_ONE] = true;
|
||||
current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE, 0, stack_assign(facin), 0));
|
||||
int node_jump_skip_index = current_svm_nodes.size() - 1;
|
||||
|
||||
@@ -678,6 +682,7 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
|
||||
/* Add instruction to skip closure and its dependencies if mix
|
||||
* weight is zero.
|
||||
*/
|
||||
svm_node_types_used[NODE_JUMP_IF_ZERO] = true;
|
||||
current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO, 0, stack_assign(facin), 0));
|
||||
int node_jump_skip_index = current_svm_nodes.size() - 1;
|
||||
|
||||
@@ -844,6 +849,9 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
|
||||
|
||||
void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Summary *summary)
|
||||
{
|
||||
svm_node_types_used[NODE_SHADER_JUMP] = true;
|
||||
svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
|
||||
|
||||
/* copy graph for shader with bump mapping */
|
||||
ShaderNode *output = shader->graph->output();
|
||||
int start_num_svm_nodes = svm_nodes.size();
|
||||
|
@@ -211,6 +211,7 @@ class SVMCompiler {
|
||||
/* compile */
|
||||
void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type);
|
||||
|
||||
std::atomic_int *svm_node_types_used;
|
||||
array<int4> current_svm_nodes;
|
||||
ShaderType current_type;
|
||||
Shader *current_shader;
|
||||
|
@@ -10,7 +10,8 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device bool ray_sphere_intersect(float3 ray_P,
|
||||
float3 ray_D,
|
||||
float ray_t,
|
||||
float ray_tmin,
|
||||
float ray_tmax,
|
||||
float3 sphere_P,
|
||||
float sphere_radius,
|
||||
ccl_private float3 *isect_P,
|
||||
@@ -33,7 +34,7 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
|
||||
return false;
|
||||
}
|
||||
const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */
|
||||
if (t < ray_t) {
|
||||
if (t > ray_tmin && t < ray_tmax) {
|
||||
*isect_t = t;
|
||||
*isect_P = ray_P + ray_D * t;
|
||||
return true;
|
||||
@@ -44,7 +45,8 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
|
||||
|
||||
ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
|
||||
float3 ray_D,
|
||||
float ray_t,
|
||||
float ray_tmin,
|
||||
float ray_tmax,
|
||||
float3 disk_P,
|
||||
float disk_radius,
|
||||
ccl_private float3 *isect_P,
|
||||
@@ -59,7 +61,7 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
|
||||
}
|
||||
/* Compute t to intersection point. */
|
||||
const float t = -disk_t / div;
|
||||
if (t < 0.0f || t > ray_t) {
|
||||
if (!(t > ray_tmin && t < ray_tmax)) {
|
||||
return false;
|
||||
}
|
||||
/* Test if within radius. */
|
||||
@@ -74,7 +76,8 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
|
||||
|
||||
ccl_device bool ray_disk_intersect(float3 ray_P,
|
||||
float3 ray_D,
|
||||
float ray_t,
|
||||
float ray_tmin,
|
||||
float ray_tmax,
|
||||
float3 disk_P,
|
||||
float3 disk_N,
|
||||
float disk_radius,
|
||||
@@ -92,7 +95,8 @@ ccl_device bool ray_disk_intersect(float3 ray_P,
|
||||
}
|
||||
float3 P = ray_P + t * ray_D;
|
||||
float3 T = P - disk_P;
|
||||
if (dot(T, T) < sqr(disk_radius) /*&& t > 0.f*/ && t <= ray_t) {
|
||||
|
||||
if (dot(T, T) < sqr(disk_radius) && (t > ray_tmin && t < ray_tmax)) {
|
||||
*isect_P = ray_P + t * ray_D;
|
||||
*isect_t = t;
|
||||
return true;
|
||||
@@ -103,7 +107,8 @@ ccl_device bool ray_disk_intersect(float3 ray_P,
|
||||
|
||||
ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
|
||||
float3 ray_dir,
|
||||
float ray_t,
|
||||
float ray_tmin,
|
||||
float ray_tmax,
|
||||
const float3 tri_a,
|
||||
const float3 tri_b,
|
||||
const float3 tri_c,
|
||||
@@ -149,16 +154,14 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
|
||||
|
||||
/* Perform depth test. */
|
||||
const float T = dot3(v0, Ng);
|
||||
const int sign_den = (__float_as_int(den) & 0x80000000);
|
||||
const float sign_T = xor_signmask(T, sign_den);
|
||||
if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) {
|
||||
const float t = T / den;
|
||||
if (!(t >= ray_tmin && t <= ray_tmax)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const float inv_den = 1.0f / den;
|
||||
*isect_u = U * inv_den;
|
||||
*isect_v = V * inv_den;
|
||||
*isect_t = T * inv_den;
|
||||
*isect_u = U / den;
|
||||
*isect_v = V / den;
|
||||
*isect_t = t;
|
||||
return true;
|
||||
|
||||
#undef dot3
|
||||
@@ -171,8 +174,8 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
|
||||
*/
|
||||
ccl_device bool ray_quad_intersect(float3 ray_P,
|
||||
float3 ray_D,
|
||||
float ray_mint,
|
||||
float ray_maxt,
|
||||
float ray_tmin,
|
||||
float ray_tmax,
|
||||
float3 quad_P,
|
||||
float3 quad_u,
|
||||
float3 quad_v,
|
||||
@@ -185,7 +188,7 @@ ccl_device bool ray_quad_intersect(float3 ray_P,
|
||||
{
|
||||
/* Perform intersection test. */
|
||||
float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
|
||||
if (t < ray_mint || t > ray_maxt) {
|
||||
if (!(t > ray_tmin && t < ray_tmax)) {
|
||||
return false;
|
||||
}
|
||||
const float3 hit = ray_P + t * ray_D;
|
||||
|
@@ -136,6 +136,19 @@ void string_replace(string &haystack, const string &needle, const string &other)
|
||||
}
|
||||
}
|
||||
|
||||
void string_replace_same_length(string &haystack, const string &needle, const string &other)
|
||||
{
|
||||
assert(needle.size() == other.size());
|
||||
size_t pos = 0;
|
||||
while (pos != string::npos) {
|
||||
pos = haystack.find(needle, pos);
|
||||
if (pos != string::npos) {
|
||||
memcpy(haystack.data() + pos, other.data(), other.size());
|
||||
pos += other.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string string_remove_trademark(const string &s)
|
||||
{
|
||||
string result = s;
|
||||
@@ -164,6 +177,11 @@ string to_string(const char *str)
|
||||
return string(str);
|
||||
}
|
||||
|
||||
string to_string(const float4 &v)
|
||||
{
|
||||
return string_printf("%f,%f,%f,%f", v.x, v.y, v.z, v.w);
|
||||
}
|
||||
|
||||
string string_to_lower(const string &s)
|
||||
{
|
||||
string r = s;
|
||||
|
@@ -38,12 +38,14 @@ void string_split(vector<string> &tokens,
|
||||
const string &separators = "\t ",
|
||||
bool skip_empty_tokens = true);
|
||||
void string_replace(string &haystack, const string &needle, const string &other);
|
||||
void string_replace_same_length(string &haystack, const string &needle, const string &other);
|
||||
bool string_startswith(string_view s, string_view start);
|
||||
bool string_endswith(string_view s, string_view end);
|
||||
string string_strip(const string &s);
|
||||
string string_remove_trademark(const string &s);
|
||||
string string_from_bool(const bool var);
|
||||
string to_string(const char *str);
|
||||
string to_string(const float4 &v);
|
||||
string string_to_lower(const string &s);
|
||||
|
||||
/* Wide char strings are only used on Windows to deal with non-ASCII
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
set(INC
|
||||
.
|
||||
../clog
|
||||
../glew-mx
|
||||
../../source/blender/imbuf
|
||||
../../source/blender/makesdna
|
||||
@@ -65,6 +66,8 @@ set(SRC
|
||||
intern/GHOST_Util.h
|
||||
intern/GHOST_Window.h
|
||||
intern/GHOST_WindowManager.h
|
||||
intern/GHOST_utildefines.h
|
||||
intern/GHOST_utildefines_variadic.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
@@ -547,21 +547,15 @@ typedef struct {
|
||||
/** The key code. */
|
||||
GHOST_TKey key;
|
||||
|
||||
/* ascii / utf8: both should always be set when possible,
|
||||
* - ascii may be '\0' however if the user presses a non ascii key
|
||||
* - unicode may not be set if the system has no unicode support
|
||||
*
|
||||
* These values are intended to be used as follows.
|
||||
* For text input use unicode when available, fallback to ascii.
|
||||
* For areas where unicode is not needed, number input for example, always
|
||||
* use ascii, unicode is ignored - campbell.
|
||||
*/
|
||||
/** The ascii code for the key event ('\0' if none). */
|
||||
char ascii;
|
||||
/** The unicode character. if the length is 6, not NULL terminated if all 6 are set. */
|
||||
char utf8_buf[6];
|
||||
|
||||
/** Generated by auto-repeat. */
|
||||
/**
|
||||
* Enabled when the key is held (auto-repeat).
|
||||
* In this case press events are sent without a corresponding release/up event.
|
||||
*
|
||||
* All back-ends must set this variable for correct behavior regarding repeatable keys.
|
||||
*/
|
||||
char is_repeat;
|
||||
} GHOST_TEventKeyData;
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user