Compare commits
284 Commits
file-brows
...
retopo_tra
Author | SHA1 | Date | |
---|---|---|---|
6485941d7a | |||
9b9417b661 | |||
03cd794119 | |||
091156f64a | |||
![]() |
cfd16c04f8 | ||
42ccbb7cd1 | |||
187d90f036 | |||
1665e40e16 | |||
d3879e9aaa | |||
065dfe744c | |||
3a138a74e5 | |||
![]() |
b862cf0b9f | ||
a679164cf6 | |||
ae0b8e904c | |||
b639e60864 | |||
c3ca487498 | |||
4815772fda | |||
e9bd6abde3 | |||
c0845abd89 | |||
848dd4a40a | |||
e261290cb6 | |||
6ca602dd9f | |||
a9c74a0cd0 | |||
3d91a853b2 | |||
4757a5ad33 | |||
eea1f9b1df | |||
1adeae56e6 | |||
5c2fff306e | |||
72d8a40a3d | |||
cf61be6190 | |||
543ea41569 | |||
19528cfecd | |||
79ab76e156 | |||
fb42c5838c | |||
d094a3722c | |||
9c65af2df0 | |||
68db023329 | |||
ae89fcfdaf | |||
fafb901baa | |||
2b9d4af261 | |||
53fc9add51 | |||
1e0aa2612c | |||
82327ce01d | |||
0830ff55d8 | |||
aacdaa7b1a | |||
ea23e937ce | |||
f3be8e66d7 | |||
69bf74bd76 | |||
c49717a824 | |||
![]() |
07e201ec13 | ||
d892f96cb1 | |||
d034c28f51 | |||
ccb9d5d307 | |||
aa7d130347 | |||
6ae9565d06 | |||
d41f0c7b15 | |||
a98102e32e | |||
8d4fa03e5c | |||
397731d4df | |||
ff048f5d27 | |||
165fa9e2a1 | |||
38af5b0501 | |||
69f2732a13 | |||
cd47d1b2ed | |||
eee25a175a | |||
55fb2abc81 | |||
0dcfd93c6e | |||
6e5eb46d73 | |||
d6b970dd7b | |||
84a3ff63d0 | |||
84272ce19a | |||
5560da7ceb | |||
37ebd66570 | |||
3b71a62390 | |||
d61ab45385 | |||
b2dd1f8f01 | |||
32a9aac3b8 | |||
9015952c9c | |||
83362f87bb | |||
415f88d8b0 | |||
ea4b1d027d | |||
![]() |
c8ae1fce60 | ||
9ac81ed6ab | |||
9f53272df4 | |||
58dcd20998 | |||
![]() |
4843b161d6 | ||
7324f32a94 | |||
18dc611b40 | |||
1e55b58e4f | |||
13e17507c0 | |||
4dd409a185 | |||
d706d0460c | |||
38e270ae30 | |||
e67710b908 | |||
f43a8835dc | |||
![]() |
2ca18e78f9 | ||
b75d0c7e7a | |||
f14f81e5ac | |||
faa0c7aa6f | |||
![]() |
88f0d483bd | ||
8571093f99 | |||
2b8e35eeb0 | |||
4f33dcff78 | |||
fd39da1df6 | |||
dbdab681cf | |||
66c6cf0d71 | |||
1998269b10 | |||
5945a90df9 | |||
72f77598a2 | |||
ac1554bcf6 | |||
5aba7f9774 | |||
78b7140b02 | |||
aa788b759a | |||
caf907626d | |||
c5712c6795 | |||
![]() |
b08c5381ac | ||
e4a779264c | |||
![]() |
c94ca54cda | ||
203e7ba332 | |||
c597d6cb64 | |||
c869f54dcb | |||
bdb4ebebf1 | |||
8ab91edd91 | |||
3ac5a52d6e | |||
c3bc53162a | |||
f1f89ca751 | |||
3ae85a0d8f | |||
37ad72ab23 | |||
4cf6524731 | |||
f76a2c0d18 | |||
462f99bf38 | |||
fb9f12eeec | |||
46dbfce7fc | |||
703dff333c | |||
00a3533429 | |||
739136caca | |||
f26aa186b2 | |||
![]() |
793d203139 | ||
60a8ade18a | |||
d57ce54e30 | |||
7a74d91e32 | |||
![]() |
c6ce70855a | ||
881ef0548a | |||
484ad31653 | |||
023eb2ea7c | |||
d567785658 | |||
332d547ab7 | |||
b9e66af686 | |||
2c81b4d4cf | |||
5feb3541f4 | |||
![]() |
cf9dd3c0d8 | ||
6f1cdcba85 | |||
c5afef1224 | |||
1c05f30e4d | |||
53113a2e57 | |||
c5394f3db8 | |||
f814871e81 | |||
47d1a7484c | |||
cd9ebc816e | |||
72fb92ded8 | |||
cacdea7f4a | |||
![]() |
44258b5ad0 | ||
7808ee9bd7 | |||
![]() |
6db059e3d7 | ||
f7d5aaa365 | |||
d26c29d8e4 | |||
31365c6b9e | |||
ad632a13d9 | |||
b1c49b3b2a | |||
a5bcb4c148 | |||
68101fea68 | |||
8ac5b1fdb3 | |||
bd9bb56f18 | |||
364babab65 | |||
0fcc04e7bf | |||
f1f2c26223 | |||
c94c0d988a | |||
0c3851d31f | |||
3ea2b4ac31 | |||
7c6d546f3a | |||
d53ea1d0af | |||
092732d113 | |||
beb746135d | |||
5da807e00f | |||
fc8b9efb24 | |||
82467e5dcf | |||
80b2fc59d1 | |||
7d8b651268 | |||
676a2f690c | |||
35843ddcd8 | |||
98395e0bdf | |||
a735b2c335 | |||
73aa6b8185 | |||
c40971d79a | |||
e4eaf424b9 | |||
6bcda04d1f | |||
1f94b56d77 | |||
![]() |
98bf714b37 | ||
185eeeaaac | |||
003dfae270 | |||
e0d4aede4d | |||
95e60b4ffd | |||
087f27a52f | |||
08c5d99e88 | |||
72e249974a | |||
d3db38cfb1 | |||
7725740543 | |||
aa1ffc093c | |||
7a4a6ccad7 | |||
ada6012518 | |||
![]() |
a5c2d0018c | ||
611be46cc9 | |||
a36f029459 | |||
ef5b435e8f | |||
d431b1416b | |||
b0f9639733 | |||
396b7a6ec8 | |||
412d93c298 | |||
92eb59341c | |||
9f00e138ac | |||
e022753d7a | |||
2bad3577c0 | |||
4ba6bac2f1 | |||
63be57307e | |||
95ab16004d | |||
03338e0270 | |||
a06b04f92d | |||
2034e8c42d | |||
538da79c6d | |||
d099e0d2a4 | |||
f7252e9692 | |||
10b048fd9e | |||
![]() |
ee3facd087 | ||
0dcee6a386 | |||
4089b7b80b | |||
d6faee2824 | |||
2eeedbbca9 | |||
7a73685460 | |||
095b8d8688 | |||
4ec0a8705b | |||
dd158f1cab | |||
c171e8b95c | |||
46a2592eef | |||
e75adb979b | |||
9f68369247 | |||
eb281e4b24 | |||
698efac59e | |||
afe11eff8a | |||
fe108d85b4 | |||
d34f8ac3d9 | |||
5d4574ea0e | |||
4ebe1c3e69 | |||
887713d08d | |||
cc761cdae6 | |||
a2938c86ca | |||
a66e20f984 | |||
db317f070e | |||
7502bc583c | |||
089870ab3a | |||
298711d158 | |||
8d284d4854 | |||
9e88cfbe0c | |||
405bbb06f2 | |||
ed8f2bbf5c | |||
ddce8e9ea3 | |||
1b9e31f004 | |||
8791762af0 | |||
8d813f2eed | |||
af29d103c6 | |||
ca336c600b | |||
491ada0a38 | |||
62f813754d | |||
cbeb70bdae | |||
139a651434 | |||
add307d429 | |||
59e6dc8a93 | |||
6410fe0492 | |||
b818008ddf | |||
af9c969768 | |||
0b25d923e5 | |||
a863ba191d | |||
f606393522 | |||
f8b389b121 | |||
![]() |
59adee83e7 |
@@ -139,6 +139,7 @@ if(NOT WIN32 OR ENABLE_MINGW64)
|
||||
include(cmake/vpx.cmake)
|
||||
include(cmake/x264.cmake)
|
||||
include(cmake/xvidcore.cmake)
|
||||
include(cmake/aom.cmake)
|
||||
include(cmake/ffmpeg.cmake)
|
||||
include(cmake/fftw.cmake)
|
||||
include(cmake/sndfile.cmake)
|
||||
|
@@ -42,4 +42,5 @@ endif()
|
||||
add_dependencies(
|
||||
external_alembic
|
||||
external_openexr
|
||||
external_imath
|
||||
)
|
||||
|
45
build_files/build_environment/cmake/aom.cmake
Normal file
45
build_files/build_environment/cmake/aom.cmake
Normal file
@@ -0,0 +1,45 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
if(WIN32)
|
||||
# The default generator on windows is msbuild, which we do not
|
||||
# want to use for this dep, as needs to build with mingw
|
||||
set(AOM_GENERATOR "Ninja")
|
||||
# The default flags are full of MSVC options given this will be
|
||||
# building with mingw, it'll have an unhappy time with that and
|
||||
# we need to clear them out.
|
||||
set(AOM_CMAKE_FLAGS )
|
||||
# CMake will correctly identify phreads being available, however
|
||||
# we do not want to use them, as that gains a dependency on
|
||||
# libpthreadswin.dll which we do not want. when pthreads is not
|
||||
# available oam will use a pthreads emulation layer using win32 threads
|
||||
set(AOM_EXTRA_ARGS_WIN32 -DCMAKE_HAVE_PTHREAD_H=OFF)
|
||||
else()
|
||||
set(AOM_GENERATOR "Unix Makefiles")
|
||||
set(AOM_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
|
||||
endif()
|
||||
|
||||
set(AOM_EXTRA_ARGS
|
||||
-DENABLE_TESTDATA=OFF
|
||||
-DENABLE_TESTS=OFF
|
||||
-DENABLE_TOOLS=OFF
|
||||
-DENABLE_EXAMPLES=OFF
|
||||
${AOM_EXTRA_ARGS_WIN32}
|
||||
)
|
||||
|
||||
# This is slightly different from all other deps in the way that
|
||||
# aom uses cmake as a build system, but still needs the environment setup
|
||||
# to include perl so we manually setup the environment and call
|
||||
# cmake directly for the configure, build and install commands.
|
||||
|
||||
ExternalProject_Add(external_aom
|
||||
URL file://${PACKAGE_DIR}/${AOM_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${AOM_HASH_TYPE}=${AOM_HASH}
|
||||
PREFIX ${BUILD_DIR}/aom
|
||||
CONFIGURE_COMMAND ${CONFIGURE_ENV} &&
|
||||
cd ${BUILD_DIR}/aom/src/external_aom-build/ &&
|
||||
${CMAKE_COMMAND} -G "${AOM_GENERATOR}" -DCMAKE_INSTALL_PREFIX=${LIBDIR}/aom ${AOM_CMAKE_FLAGS} ${AOM_EXTRA_ARGS} ${BUILD_DIR}/aom/src/external_aom/
|
||||
BUILD_COMMAND ${CMAKE_COMMAND} --build .
|
||||
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install
|
||||
INSTALL_DIR ${LIBDIR}/aom
|
||||
)
|
@@ -116,3 +116,4 @@ download_source(IGC_SPIRV_TOOLS)
|
||||
download_source(IGC_SPIRV_TRANSLATOR)
|
||||
download_source(GMMLIB)
|
||||
download_source(OCLOC)
|
||||
download_source(AOM)
|
||||
|
@@ -1,9 +1,9 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(FFMPEG_CFLAGS "-I${mingw_LIBDIR}/lame/include -I${mingw_LIBDIR}/openjpeg/include/ -I${mingw_LIBDIR}/ogg/include -I${mingw_LIBDIR}/vorbis/include -I${mingw_LIBDIR}/theora/include -I${mingw_LIBDIR}/opus/include -I${mingw_LIBDIR}/vpx/include -I${mingw_LIBDIR}/x264/include -I${mingw_LIBDIR}/xvidcore/include -I${mingw_LIBDIR}/zlib/include")
|
||||
set(FFMPEG_LDFLAGS "-L${mingw_LIBDIR}/lame/lib -L${mingw_LIBDIR}/openjpeg/lib -L${mingw_LIBDIR}/ogg/lib -L${mingw_LIBDIR}/vorbis/lib -L${mingw_LIBDIR}/theora/lib -L${mingw_LIBDIR}/opus/lib -L${mingw_LIBDIR}/vpx/lib -L${mingw_LIBDIR}/x264/lib -L${mingw_LIBDIR}/xvidcore/lib -L${mingw_LIBDIR}/zlib/lib")
|
||||
set(FFMPEG_CFLAGS "-I${mingw_LIBDIR}/lame/include -I${mingw_LIBDIR}/openjpeg/include/ -I${mingw_LIBDIR}/ogg/include -I${mingw_LIBDIR}/vorbis/include -I${mingw_LIBDIR}/theora/include -I${mingw_LIBDIR}/opus/include -I${mingw_LIBDIR}/vpx/include -I${mingw_LIBDIR}/x264/include -I${mingw_LIBDIR}/xvidcore/include -I${mingw_LIBDIR}/zlib/include -I${mingw_LIBDIR}/aom/include")
|
||||
set(FFMPEG_LDFLAGS "-L${mingw_LIBDIR}/lame/lib -L${mingw_LIBDIR}/openjpeg/lib -L${mingw_LIBDIR}/ogg/lib -L${mingw_LIBDIR}/vorbis/lib -L${mingw_LIBDIR}/theora/lib -L${mingw_LIBDIR}/opus/lib -L${mingw_LIBDIR}/vpx/lib -L${mingw_LIBDIR}/x264/lib -L${mingw_LIBDIR}/xvidcore/lib -L${mingw_LIBDIR}/zlib/lib -L${mingw_LIBDIR}/aom/lib")
|
||||
set(FFMPEG_EXTRA_FLAGS --pkg-config-flags=--static --extra-cflags=${FFMPEG_CFLAGS} --extra-ldflags=${FFMPEG_LDFLAGS})
|
||||
set(FFMPEG_ENV PKG_CONFIG_PATH=${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/x264/lib/pkgconfig:${mingw_LIBDIR}/vorbis/lib/pkgconfig:${mingw_LIBDIR}/ogg/lib/pkgconfig:${mingw_LIBDIR}:${mingw_LIBDIR}/vpx/lib/pkgconfig:${mingw_LIBDIR}/theora/lib/pkgconfig:${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/opus/lib/pkgconfig:)
|
||||
set(FFMPEG_ENV PKG_CONFIG_PATH=${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/x264/lib/pkgconfig:${mingw_LIBDIR}/vorbis/lib/pkgconfig:${mingw_LIBDIR}/ogg/lib/pkgconfig:${mingw_LIBDIR}:${mingw_LIBDIR}/vpx/lib/pkgconfig:${mingw_LIBDIR}/theora/lib/pkgconfig:${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/opus/lib/pkgconfig:${mingw_LIBDIR}/aom/lib/pkgconfig:)
|
||||
|
||||
if(WIN32)
|
||||
set(FFMPEG_ENV set ${FFMPEG_ENV} &&)
|
||||
@@ -79,6 +79,7 @@ ExternalProject_Add(external_ffmpeg
|
||||
--disable-librtmp
|
||||
--enable-libx264
|
||||
--enable-libxvid
|
||||
--enable-libaom
|
||||
--disable-libopencore-amrnb
|
||||
--disable-libopencore-amrwb
|
||||
--disable-libdc1394
|
||||
@@ -125,6 +126,7 @@ add_dependencies(
|
||||
external_vorbis
|
||||
external_ogg
|
||||
external_lame
|
||||
external_aom
|
||||
)
|
||||
if(WIN32)
|
||||
add_dependencies(
|
||||
|
@@ -5,8 +5,6 @@ ExternalProject_Add(external_flex
|
||||
URL_HASH ${FLEX_HASH_TYPE}=${FLEX_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/flex
|
||||
# This patch fixes build with some versions of glibc (https://github.com/westes/flex/commit/24fd0551333e7eded87b64dd36062da3df2f6380)
|
||||
PATCH_COMMAND ${PATCH_CMD} -d ${BUILD_DIR}/flex/src/external_flex < ${PATCH_DIR}/flex.diff
|
||||
CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && ${CONFIGURE_COMMAND} --prefix=${LIBDIR}/flex
|
||||
BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make -j${MAKE_THREADS}
|
||||
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make install
|
||||
|
@@ -25,9 +25,6 @@ if(BUILD_MODE STREQUAL Release)
|
||||
# glew-> opengl
|
||||
${CMAKE_COMMAND} -E copy ${LIBDIR}/glew/lib/libglew32.lib ${HARVEST_TARGET}/opengl/lib/glew.lib &&
|
||||
${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/glew/include/ ${HARVEST_TARGET}/opengl/include/ &&
|
||||
# tiff
|
||||
${CMAKE_COMMAND} -E copy ${LIBDIR}/tiff/lib/tiff.lib ${HARVEST_TARGET}/tiff/lib/libtiff.lib &&
|
||||
${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/tiff/include/ ${HARVEST_TARGET}/tiff/include/
|
||||
DEPENDS
|
||||
)
|
||||
endif()
|
||||
@@ -177,6 +174,7 @@ harvest(opus/lib ffmpeg/lib "*.a")
|
||||
harvest(vpx/lib ffmpeg/lib "*.a")
|
||||
harvest(x264/lib ffmpeg/lib "*.a")
|
||||
harvest(xvidcore/lib ffmpeg/lib "*.a")
|
||||
harvest(aom/lib ffmpeg/lib "*.a")
|
||||
harvest(webp/lib webp/lib "*.a")
|
||||
harvest(webp/include webp/include "*.h")
|
||||
harvest(usd/include usd/include "*.h")
|
||||
|
@@ -18,9 +18,15 @@ if(WIN32)
|
||||
set(PNG_LIBNAME libpng16_static${LIBEXT})
|
||||
set(OIIO_SIMD_FLAGS -DUSE_SIMD=sse2)
|
||||
set(OPENJPEG_POSTFIX _msvc)
|
||||
if(BUILD_MODE STREQUAL Debug)
|
||||
set(TIFF_POSTFIX d)
|
||||
else()
|
||||
set(TIFF_POSTFIX)
|
||||
endif()
|
||||
else()
|
||||
set(PNG_LIBNAME libpng${LIBEXT})
|
||||
set(OIIO_SIMD_FLAGS)
|
||||
set(TIFF_POSTFIX)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
@@ -65,7 +71,7 @@ set(OPENIMAGEIO_EXTRA_ARGS
|
||||
-DZLIB_INCLUDE_DIR=${LIBDIR}/zlib/include
|
||||
-DPNG_LIBRARY=${LIBDIR}/png/lib/${PNG_LIBNAME}
|
||||
-DPNG_PNG_INCLUDE_DIR=${LIBDIR}/png/include
|
||||
-DTIFF_LIBRARY=${LIBDIR}/tiff/lib/${LIBPREFIX}tiff${LIBEXT}
|
||||
-DTIFF_LIBRARY=${LIBDIR}/tiff/lib/${LIBPREFIX}tiff${TIFF_POSTFIX}${LIBEXT}
|
||||
-DTIFF_INCLUDE_DIR=${LIBDIR}/tiff/include
|
||||
-DJPEG_LIBRARY=${LIBDIR}/jpeg/lib/${JPEG_LIBRARY}
|
||||
-DJPEG_INCLUDE_DIR=${LIBDIR}/jpeg/include
|
||||
|
@@ -3,6 +3,8 @@
|
||||
set(TIFF_EXTRA_ARGS
|
||||
-DZLIB_LIBRARY=${LIBDIR}/zlib/lib/${ZLIB_LIBRARY}
|
||||
-DZLIB_INCLUDE_DIR=${LIBDIR}/zlib/include
|
||||
-DJPEG_LIBRARY=${LIBDIR}/jpeg/lib/${JPEG_LIBRARY}
|
||||
-DJPEG_INCLUDE_DIR=${LIBDIR}/jpeg/include
|
||||
-DPNG_STATIC=ON
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
-Dlzma=OFF
|
||||
@@ -24,10 +26,12 @@ add_dependencies(
|
||||
external_tiff
|
||||
external_zlib
|
||||
)
|
||||
|
||||
if(WIN32 AND BUILD_MODE STREQUAL Debug)
|
||||
ExternalProject_Add_Step(external_tiff after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/tiff/lib/tiffd${LIBEXT} ${LIBDIR}/tiff/lib/tiff${LIBEXT}
|
||||
DEPENDEES install
|
||||
)
|
||||
if(WIN32)
|
||||
if(BUILD_MODE STREQUAL Release)
|
||||
ExternalProject_Add_Step(external_tiff after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/tiff/lib/tiff.lib ${HARVEST_TARGET}/tiff/lib/libtiff.lib &&
|
||||
${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/tiff/include/ ${HARVEST_TARGET}/tiff/include/
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
@@ -45,15 +45,15 @@ set(PTHREADS_HASH f3bf81bb395840b3446197bcf4ecd653)
|
||||
set(PTHREADS_HASH_TYPE MD5)
|
||||
set(PTHREADS_FILE pthreads4w-code-${PTHREADS_VERSION}.zip)
|
||||
|
||||
set(OPENEXR_VERSION 3.1.4)
|
||||
set(OPENEXR_VERSION 3.1.5)
|
||||
set(OPENEXR_URI https://github.com/AcademySoftwareFoundation/openexr/archive/v${OPENEXR_VERSION}.tar.gz)
|
||||
set(OPENEXR_HASH e990be1ff765797bc2d93a8060e1c1f2)
|
||||
set(OPENEXR_HASH a92f38eedd43e56c0af56d4852506886)
|
||||
set(OPENEXR_HASH_TYPE MD5)
|
||||
set(OPENEXR_FILE openexr-${OPENEXR_VERSION}.tar.gz)
|
||||
|
||||
set(IMATH_VERSION 3.1.4)
|
||||
set(IMATH_VERSION 3.1.5)
|
||||
set(IMATH_URI https://github.com/AcademySoftwareFoundation/Imath/archive/v${OPENEXR_VERSION}.tar.gz)
|
||||
set(IMATH_HASH fddf14ec73e12c34e74c3c175e311a3f)
|
||||
set(IMATH_HASH dd375574276c54872b7b3d54053baff0)
|
||||
set(IMATH_HASH_TYPE MD5)
|
||||
set(IMATH_FILE imath-${IMATH_VERSION}.tar.gz)
|
||||
|
||||
@@ -163,9 +163,9 @@ set(ROBINMAP_HASH c08ec4b1bf1c85eb0d6432244a6a89862229da1cb834f3f90fba8dc35d8c8e
|
||||
set(ROBINMAP_HASH_TYPE SHA256)
|
||||
set(ROBINMAP_FILE robinmap-${ROBINMAP_VERSION}.tar.gz)
|
||||
|
||||
set(TIFF_VERSION 4.3.0)
|
||||
set(TIFF_VERSION 4.4.0)
|
||||
set(TIFF_URI http://download.osgeo.org/libtiff/tiff-${TIFF_VERSION}.tar.gz)
|
||||
set(TIFF_HASH 0a2e4744d1426a8fc8211c0cdbc3a1b3)
|
||||
set(TIFF_HASH 376f17f189e9d02280dfe709b2b2bbea)
|
||||
set(TIFF_HASH_TYPE MD5)
|
||||
set(TIFF_FILE tiff-${TIFF_VERSION}.tar.gz)
|
||||
|
||||
@@ -633,3 +633,9 @@ set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC
|
||||
set(OCLOC_HASH ab22b8bf2560a57fdd3def0e35a62ca75991406f959c0263abb00cd6cd9ae998)
|
||||
set(OCLOC_HASH_TYPE SHA256)
|
||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||
|
||||
set(AOM_VERSION 3.4.0)
|
||||
set(AOM_URI https://storage.googleapis.com/aom-releases/libaom-${AOM_VERSION}.tar.gz)
|
||||
set(AOM_HASH bd754b58c3fa69f3ffd29da77de591bd9c26970e3b18537951336d6c0252e354)
|
||||
set(AOM_HASH_TYPE SHA256)
|
||||
set(AOM_FILE libaom-${AOM_VERSION}.tar.gz)
|
||||
|
@@ -1,11 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
if(WIN32)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(VPX_EXTRA_FLAGS --target=x86_64-win64-gcc --disable-multithread)
|
||||
else()
|
||||
set(VPX_EXTRA_FLAGS --target=x86-win32-gcc --disable-multithread)
|
||||
endif()
|
||||
# VPX is determined to use pthreads which it will tell ffmpeg to dynamically
|
||||
# link, which is not something we're super into distribution wise. However
|
||||
# if it cannot find pthread.h it'll happily provide a pthread emulation
|
||||
# layer using win32 threads. So all this patch does is make it not find
|
||||
# pthead.h
|
||||
set(VPX_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/vpx/src/external_vpx < ${PATCH_DIR}/vpx_windows.diff)
|
||||
set(VPX_EXTRA_FLAGS --target=x86_64-win64-gcc )
|
||||
else()
|
||||
if(APPLE)
|
||||
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
|
||||
@@ -18,6 +20,16 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT BLENDER_PLATFORM_ARM)
|
||||
list(APPEND VPX_EXTRA_FLAGS
|
||||
--enable-sse4_1
|
||||
--enable-sse3
|
||||
--enable-ssse3
|
||||
--enable-avx
|
||||
--enable-avx2
|
||||
)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_vpx
|
||||
URL file://${PACKAGE_DIR}/${VPX_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
@@ -30,11 +42,6 @@ ExternalProject_Add(external_vpx
|
||||
--enable-static
|
||||
--disable-install-bins
|
||||
--disable-install-srcs
|
||||
--disable-sse4_1
|
||||
--disable-sse3
|
||||
--disable-ssse3
|
||||
--disable-avx
|
||||
--disable-avx2
|
||||
--disable-unit-tests
|
||||
--disable-examples
|
||||
--enable-vp8
|
||||
@@ -42,6 +49,7 @@ ExternalProject_Add(external_vpx
|
||||
${VPX_EXTRA_FLAGS}
|
||||
BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/vpx/src/external_vpx/ && make -j${MAKE_THREADS}
|
||||
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/vpx/src/external_vpx/ && make install
|
||||
PATCH_COMMAND ${VPX_PATCH}
|
||||
INSTALL_DIR ${LIBDIR}/vpx
|
||||
)
|
||||
|
||||
|
@@ -478,7 +478,7 @@ OCIO_FORCE_BUILD=false
|
||||
OCIO_FORCE_REBUILD=false
|
||||
OCIO_SKIP=false
|
||||
|
||||
IMATH_VERSION="3.1.4"
|
||||
IMATH_VERSION="3.1.5"
|
||||
IMATH_VERSION_SHORT="3.1"
|
||||
IMATH_VERSION_MIN="3.0"
|
||||
IMATH_VERSION_MEX="4.0"
|
||||
@@ -487,7 +487,7 @@ IMATH_FORCE_REBUILD=false
|
||||
IMATH_SKIP=false
|
||||
_with_built_imath=false
|
||||
|
||||
OPENEXR_VERSION="3.1.4"
|
||||
OPENEXR_VERSION="3.1.5"
|
||||
OPENEXR_VERSION_SHORT="3.1"
|
||||
OPENEXR_VERSION_MIN="3.0"
|
||||
OPENEXR_VERSION_MEX="4.0"
|
||||
@@ -627,6 +627,9 @@ WEBP_DEV=""
|
||||
VPX_USE=false
|
||||
VPX_VERSION_MIN=0.9.7
|
||||
VPX_DEV=""
|
||||
AOM_USE=false
|
||||
AOM_VERSION_MIN=3.3.0
|
||||
AOM_DEV=""
|
||||
OPUS_USE=false
|
||||
OPUS_VERSION_MIN=1.1.1
|
||||
OPUS_DEV=""
|
||||
@@ -1209,7 +1212,7 @@ You may also want to build them yourself (optional ones are [between brackets]):
|
||||
** [NumPy $PYTHON_NUMPY_VERSION] (use pip).
|
||||
* Boost $BOOST_VERSION (from $BOOST_SOURCE, modules: $BOOST_BUILD_MODULES).
|
||||
* TBB $TBB_VERSION (from $TBB_SOURCE).
|
||||
* [FFMpeg $FFMPEG_VERSION (needs libvorbis, libogg, libtheora, libx264, libmp3lame, libxvidcore, libvpx, libwebp, ...)] (from $FFMPEG_SOURCE).
|
||||
* [FFMpeg $FFMPEG_VERSION (needs libvorbis, libogg, libtheora, libx264, libmp3lame, libxvidcore, libvpx, libaom, libwebp, ...)] (from $FFMPEG_SOURCE).
|
||||
* [OpenColorIO $OCIO_VERSION] (from $OCIO_SOURCE).
|
||||
* Imath $IMATH_VERSION (from $IMATH_SOURCE).
|
||||
* OpenEXR $OPENEXR_VERSION (from $OPENEXR_SOURCE).
|
||||
@@ -3000,7 +3003,7 @@ compile_ALEMBIC() {
|
||||
fi
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled result!
|
||||
alembic_magic=2
|
||||
alembic_magic=3
|
||||
_init_alembic
|
||||
|
||||
# Force having own builds for the dependencies.
|
||||
@@ -3048,7 +3051,7 @@ compile_ALEMBIC() {
|
||||
fi
|
||||
if [ "$_with_built_openexr" = true ]; then
|
||||
cmake_d="$cmake_d -D USE_ARNOLD=OFF"
|
||||
cmake_d="$cmake_d -D USE_BINARIES=OFF"
|
||||
cmake_d="$cmake_d -D USE_BINARIES=ON" # Tests use some Alembic binaries...
|
||||
cmake_d="$cmake_d -D USE_EXAMPLES=OFF"
|
||||
cmake_d="$cmake_d -D USE_HDF5=OFF"
|
||||
cmake_d="$cmake_d -D USE_MAYA=OFF"
|
||||
@@ -3634,7 +3637,7 @@ compile_FFmpeg() {
|
||||
fi
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled result!
|
||||
ffmpeg_magic=5
|
||||
ffmpeg_magic=6
|
||||
_init_ffmpeg
|
||||
|
||||
# Force having own builds for the dependencies.
|
||||
@@ -3687,6 +3690,10 @@ compile_FFmpeg() {
|
||||
extra="$extra --enable-libvpx"
|
||||
fi
|
||||
|
||||
if [ "$AOM_USE" = true ]; then
|
||||
extra="$extra --enable-libaom"
|
||||
fi
|
||||
|
||||
if [ "$WEBP_USE" = true ]; then
|
||||
extra="$extra --enable-libwebp"
|
||||
fi
|
||||
@@ -4140,30 +4147,34 @@ install_DEB() {
|
||||
WEBP_USE=true
|
||||
fi
|
||||
|
||||
if [ "$WITH_ALL" = true ]; then
|
||||
XVID_DEV="libxvidcore-dev"
|
||||
check_package_DEB $XVID_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
XVID_USE=true
|
||||
fi
|
||||
XVID_DEV="libxvidcore-dev"
|
||||
check_package_DEB $XVID_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
XVID_USE=true
|
||||
fi
|
||||
|
||||
MP3LAME_DEV="libmp3lame-dev"
|
||||
check_package_DEB $MP3LAME_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
MP3LAME_USE=true
|
||||
fi
|
||||
MP3LAME_DEV="libmp3lame-dev"
|
||||
check_package_DEB $MP3LAME_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
MP3LAME_USE=true
|
||||
fi
|
||||
|
||||
VPX_DEV="libvpx-dev"
|
||||
check_package_version_ge_DEB $VPX_DEV $VPX_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
VPX_USE=true
|
||||
fi
|
||||
VPX_DEV="libvpx-dev"
|
||||
check_package_version_ge_DEB $VPX_DEV $VPX_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
VPX_USE=true
|
||||
fi
|
||||
|
||||
OPUS_DEV="libopus-dev"
|
||||
check_package_version_ge_DEB $OPUS_DEV $OPUS_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
OPUS_USE=true
|
||||
fi
|
||||
AOM_DEV="libaom-dev"
|
||||
check_package_version_ge_DEB $AOM_DEV $AOM_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
AOM_USE=true
|
||||
fi
|
||||
|
||||
OPUS_DEV="libopus-dev"
|
||||
check_package_version_ge_DEB $OPUS_DEV $OPUS_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
OPUS_USE=true
|
||||
fi
|
||||
|
||||
# Check cmake version and disable features for older distros.
|
||||
@@ -4546,6 +4557,9 @@ install_DEB() {
|
||||
if [ "$VPX_USE" = true ]; then
|
||||
_packages="$_packages $VPX_DEV"
|
||||
fi
|
||||
if [ "$AOM_USE" = true ]; then
|
||||
_packages="$_packages $AOM_DEV"
|
||||
fi
|
||||
if [ "$OPUS_USE" = true ]; then
|
||||
_packages="$_packages $OPUS_DEV"
|
||||
fi
|
||||
@@ -4846,21 +4860,27 @@ install_RPM() {
|
||||
WEBP_USE=true
|
||||
fi
|
||||
|
||||
if [ "$WITH_ALL" = true ]; then
|
||||
VPX_DEV="libvpx-devel"
|
||||
check_package_version_ge_RPM $VPX_DEV $VPX_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
VPX_USE=true
|
||||
fi
|
||||
VPX_DEV="libvpx-devel"
|
||||
check_package_version_ge_RPM $VPX_DEV $VPX_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
VPX_USE=true
|
||||
fi
|
||||
|
||||
AOM_DEV="libaom-devel"
|
||||
check_package_version_ge_RPM $AOM_DEV $AOM_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
AOM_USE=true
|
||||
fi
|
||||
|
||||
OPUS_DEV="libopus-devel"
|
||||
check_package_version_ge_RPM $OPUS_DEV $OPUS_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
OPUS_USE=true
|
||||
fi
|
||||
|
||||
if [ "$WITH_ALL" = true ]; then
|
||||
PRINT ""
|
||||
install_packages_RPM libspnav-devel
|
||||
|
||||
OPUS_DEV="libopus-devel"
|
||||
check_package_version_ge_RPM $OPUS_DEV $OPUS_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
OPUS_USE=true
|
||||
fi
|
||||
fi
|
||||
|
||||
PRINT ""
|
||||
@@ -5245,6 +5265,9 @@ install_RPM() {
|
||||
if [ "$VPX_USE" = true ]; then
|
||||
_packages="$_packages $VPX_DEV"
|
||||
fi
|
||||
if [ "$AOM_USE" = true ]; then
|
||||
_packages="$_packages $AOM_DEV"
|
||||
fi
|
||||
if [ "$OPUS_USE" = true ]; then
|
||||
_packages="$_packages $OPUS_DEV"
|
||||
fi
|
||||
@@ -5434,30 +5457,34 @@ install_ARCH() {
|
||||
WEBP_USE=true
|
||||
fi
|
||||
|
||||
if [ "$WITH_ALL" = true ]; then
|
||||
XVID_DEV="xvidcore"
|
||||
check_package_ARCH $XVID_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
XVID_USE=true
|
||||
fi
|
||||
XVID_DEV="xvidcore"
|
||||
check_package_ARCH $XVID_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
XVID_USE=true
|
||||
fi
|
||||
|
||||
MP3LAME_DEV="lame"
|
||||
check_package_ARCH $MP3LAME_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
MP3LAME_USE=true
|
||||
fi
|
||||
MP3LAME_DEV="lame"
|
||||
check_package_ARCH $MP3LAME_DEV
|
||||
if [ $? -eq 0 ]; then
|
||||
MP3LAME_USE=true
|
||||
fi
|
||||
|
||||
VPX_DEV="libvpx"
|
||||
check_package_version_ge_ARCH $VPX_DEV $VPX_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
VPX_USE=true
|
||||
fi
|
||||
VPX_DEV="libvpx"
|
||||
check_package_version_ge_ARCH $VPX_DEV $VPX_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
VPX_USE=true
|
||||
fi
|
||||
|
||||
OPUS_DEV="opus"
|
||||
check_package_version_ge_ARCH $OPUS_DEV $OPUS_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
OPUS_USE=true
|
||||
fi
|
||||
AOM_DEV="libaom"
|
||||
check_package_version_ge_ARCH $AOM_DEV $AOM_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
AOM_USE=true
|
||||
fi
|
||||
|
||||
OPUS_DEV="opus"
|
||||
check_package_version_ge_ARCH $OPUS_DEV $OPUS_VERSION_MIN
|
||||
if [ $? -eq 0 ]; then
|
||||
OPUS_USE=true
|
||||
fi
|
||||
|
||||
|
||||
@@ -5835,6 +5862,9 @@ install_ARCH() {
|
||||
if [ "$VPX_USE" = true ]; then
|
||||
_packages="$_packages $VPX_DEV"
|
||||
fi
|
||||
if [ "$AOM_USE" = true ]; then
|
||||
_packages="$_packages $AOM_DEV"
|
||||
fi
|
||||
if [ "$OPUS_USE" = true ]; then
|
||||
_packages="$_packages $OPUS_DEV"
|
||||
fi
|
||||
|
@@ -1,15 +0,0 @@
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index c6f12d644..3c977a4e3 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -25,8 +25,10 @@
|
||||
# autoconf requirements and initialization
|
||||
|
||||
AC_INIT([the fast lexical analyser generator],[2.6.4],[flex-help@lists.sourceforge.net],[flex])
|
||||
+AC_PREREQ([2.60])
|
||||
AC_CONFIG_SRCDIR([src/scan.l])
|
||||
AC_CONFIG_AUX_DIR([build-aux])
|
||||
+AC_USE_SYSTEM_EXTENSIONS
|
||||
LT_INIT
|
||||
AM_INIT_AUTOMAKE([1.15 -Wno-portability foreign std-options dist-lzip parallel-tests subdir-objects])
|
||||
AC_CONFIG_HEADER([src/config.h])
|
11
build_files/build_environment/patches/vpx_windows.diff
Normal file
11
build_files/build_environment/patches/vpx_windows.diff
Normal file
@@ -0,0 +1,11 @@
|
||||
diff -Naur orig/configure external_vpx/configure
|
||||
--- orig/configure 2022-07-06 09:22:04 -0600
|
||||
+++ external_vpx/configure 2022-07-06 09:24:12 -0600
|
||||
@@ -270,7 +270,6 @@
|
||||
HAVE_LIST="
|
||||
${ARCH_EXT_LIST}
|
||||
vpx_ports
|
||||
- pthread_h
|
||||
unistd_h
|
||||
"
|
||||
EXPERIMENT_LIST="
|
@@ -78,11 +78,6 @@ if(UNIX AND NOT APPLE)
|
||||
set(WITH_PULSEAUDIO ON CACHE BOOL "" FORCE)
|
||||
set(WITH_X11_XINPUT ON CACHE BOOL "" FORCE)
|
||||
set(WITH_X11_XF86VMODE ON CACHE BOOL "" FORCE)
|
||||
|
||||
# Disable oneAPI on Linux for the time being.
|
||||
# The AoT compilation takes too long to be used officially in the buildbot CI/CD and the JIT
|
||||
# compilation has ABI compatibility issues when running builds made on centOS on Ubuntu.
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI OFF CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
if(NOT APPLE)
|
||||
set(WITH_XR_OPENXR ON CACHE BOOL "" FORCE)
|
||||
@@ -93,6 +88,6 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||
|
||||
# Disable AoT kernels compilations until buildbot can deliver them in a reasonabel time.
|
||||
# Disable AoT kernels compilations until buildbot can deliver them in a reasonable time.
|
||||
set(WITH_CYCLES_ONEAPI_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@@ -162,6 +162,9 @@ if(WITH_CODEC_FFMPEG)
|
||||
mp3lame ogg opus swresample swscale
|
||||
theora theoradec theoraenc vorbis vorbisenc
|
||||
vorbisfile vpx x264 xvidcore)
|
||||
if(EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a)
|
||||
list(APPEND FFMPEG_FIND_COMPONENTS aom)
|
||||
endif()
|
||||
find_package(FFmpeg)
|
||||
endif()
|
||||
|
||||
@@ -467,8 +470,9 @@ string(APPEND CMAKE_CXX_FLAGS " -ftemplate-depth=1024")
|
||||
|
||||
# Avoid conflicts with Luxrender, and other plug-ins that may use the same
|
||||
# libraries as Blender with a different version or build options.
|
||||
set(PLATFORM_SYMBOLS_MAP ${CMAKE_SOURCE_DIR}/source/creator/symbols_apple.map)
|
||||
string(APPEND PLATFORM_LINKFLAGS
|
||||
" -Wl,-unexported_symbols_list,'${CMAKE_SOURCE_DIR}/source/creator/osx_locals.map'"
|
||||
" -Wl,-unexported_symbols_list,'${PLATFORM_SYMBOLS_MAP}'"
|
||||
)
|
||||
|
||||
string(APPEND CMAKE_CXX_FLAGS " -stdlib=libc++")
|
||||
|
@@ -202,6 +202,9 @@ if(WITH_CODEC_FFMPEG)
|
||||
vpx
|
||||
x264
|
||||
xvidcore)
|
||||
if(EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a)
|
||||
list(APPEND FFMPEG_FIND_COMPONENTS aom)
|
||||
endif()
|
||||
elseif(FFMPEG)
|
||||
# Old cache variable used for root dir, convert to new standard.
|
||||
set(FFMPEG_ROOT_DIR ${FFMPEG})
|
||||
@@ -885,8 +888,9 @@ unset(_IS_LINKER_DEFAULT)
|
||||
|
||||
# Avoid conflicts with Mesa llvmpipe, Luxrender, and other plug-ins that may
|
||||
# use the same libraries as Blender with a different version or build options.
|
||||
set(PLATFORM_SYMBOLS_MAP ${CMAKE_SOURCE_DIR}/source/creator/symbols_unix.map)
|
||||
set(PLATFORM_LINKFLAGS
|
||||
"${PLATFORM_LINKFLAGS} -Wl,--version-script='${CMAKE_SOURCE_DIR}/source/creator/blender.map'"
|
||||
"${PLATFORM_LINKFLAGS} -Wl,--version-script='${PLATFORM_SYMBOLS_MAP}'"
|
||||
)
|
||||
|
||||
# Don't use position independent executable for portable install since file
|
||||
|
@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = V3.3
|
||||
PROJECT_NUMBER = V3.4
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
@@ -1131,6 +1131,7 @@ def pymodule2sphinx(basepath, module_name, module, title, module_all_extra):
|
||||
# Changes In Blender will force errors here.
|
||||
context_type_map = {
|
||||
# context_member: (RNA type, is_collection)
|
||||
"active_action": ("Action", False),
|
||||
"active_annotation_layer": ("GPencilLayer", False),
|
||||
"active_bone": ("EditBone", False),
|
||||
"active_file": ("FileSelectEntry", False),
|
||||
|
7
extern/gflags/CMakeLists.txt
vendored
7
extern/gflags/CMakeLists.txt
vendored
@@ -1,6 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
# Copyright 2016 Blender Foundation. All rights reserved.
|
||||
|
||||
# Too noisy for code we don't maintain.
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
if(NOT "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "8.0")
|
||||
add_cxx_flag("-Wno-cast-function-type")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(INC
|
||||
src
|
||||
src/gflags
|
||||
|
@@ -36,8 +36,13 @@ if(WITH_CYCLES_NATIVE_ONLY)
|
||||
)
|
||||
|
||||
if(NOT MSVC)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -march=native")
|
||||
set(CYCLES_KERNEL_FLAGS "-march=native")
|
||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_march_native "-march=native")
|
||||
if(_has_march_native)
|
||||
set(CYCLES_KERNEL_FLAGS "-march=native")
|
||||
else()
|
||||
set(CYCLES_KERNEL_FLAGS "")
|
||||
endif()
|
||||
unset(_has_march_native)
|
||||
else()
|
||||
if(NOT MSVC_NATIVE_ARCH_FLAGS)
|
||||
TRY_RUN(
|
||||
|
@@ -55,7 +55,7 @@ static bool ObtainCacheParticleData(
|
||||
return false;
|
||||
|
||||
Transform tfm = get_transform(b_ob->matrix_world());
|
||||
Transform itfm = transform_quick_inverse(tfm);
|
||||
Transform itfm = transform_inverse(tfm);
|
||||
|
||||
for (BL::Modifier &b_mod : b_ob->modifiers) {
|
||||
if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
|
||||
|
@@ -928,8 +928,22 @@ static ShaderNode *add_node(Scene *scene,
|
||||
sky->set_sun_disc(b_sky_node.sun_disc());
|
||||
sky->set_sun_size(b_sky_node.sun_size());
|
||||
sky->set_sun_intensity(b_sky_node.sun_intensity());
|
||||
sky->set_sun_elevation(b_sky_node.sun_elevation());
|
||||
sky->set_sun_rotation(b_sky_node.sun_rotation());
|
||||
/* Patch sun position to be able to animate daylight cycle while keeping the shading code
|
||||
* simple. */
|
||||
float sun_rotation = b_sky_node.sun_rotation();
|
||||
/* Wrap into [-2PI..2PI] range. */
|
||||
float sun_elevation = fmodf(b_sky_node.sun_elevation(), M_2PI_F);
|
||||
/* Wrap into [-PI..PI] range. */
|
||||
if (fabsf(sun_elevation) >= M_PI_F) {
|
||||
sun_elevation -= copysignf(2.0f, sun_elevation) * M_PI_F;
|
||||
}
|
||||
/* Wrap into [-PI/2..PI/2] range while keeping the same absolute position. */
|
||||
if (sun_elevation >= M_PI_2_F || sun_elevation <= -M_PI_2_F) {
|
||||
sun_elevation = copysignf(M_PI_F, sun_elevation) - sun_elevation;
|
||||
sun_rotation += M_PI_F;
|
||||
}
|
||||
sky->set_sun_elevation(sun_elevation);
|
||||
sky->set_sun_rotation(sun_rotation);
|
||||
sky->set_altitude(b_sky_node.altitude());
|
||||
sky->set_air_density(b_sky_node.air_density());
|
||||
sky->set_dust_density(b_sky_node.dust_density());
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#include "MEM_guardedalloc.h"
|
||||
#include "RNA_access.h"
|
||||
#include "RNA_blender_cpp.h"
|
||||
#include "RNA_path.h"
|
||||
#include "RNA_types.h"
|
||||
|
||||
#include "blender/id_map.h"
|
||||
|
@@ -21,13 +21,9 @@
|
||||
|
||||
# include "bvh/embree.h"
|
||||
|
||||
/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
|
||||
*/
|
||||
# include "kernel/bvh/embree.h"
|
||||
# include "kernel/bvh/util.h"
|
||||
# include "kernel/device/cpu/bvh.h"
|
||||
# include "kernel/device/cpu/compat.h"
|
||||
# include "kernel/device/cpu/globals.h"
|
||||
# include "kernel/sample/lcg.h"
|
||||
|
||||
# include "scene/hair.h"
|
||||
# include "scene/mesh.h"
|
||||
@@ -46,265 +42,6 @@ static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT,
|
||||
static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
|
||||
"Object and Geometry max motion steps inconsistent");
|
||||
|
||||
# define IS_HAIR(x) (x & 1)
|
||||
|
||||
/* This gets called by Embree at every valid ray/object intersection.
|
||||
* Things like recording subsurface or shadow hits for later evaluation
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls.
|
||||
*/
|
||||
static void rtc_filter_intersection_func(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
assert(args->N == 1);
|
||||
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(kg, hit, cray)) {
|
||||
*args->valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* This gets called by Embree at every valid ray/object intersection.
|
||||
* Things like recording subsurface or shadow hits for later evaluation
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls.
|
||||
*/
|
||||
static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
assert(args->N == 1);
|
||||
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
switch (ctx->type) {
|
||||
case CCLIntersectContext::RAY_SHADOW_ALL: {
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
/* If no transparent shadows or max number of hits exceeded, all light is blocked. */
|
||||
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
|
||||
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
|
||||
ctx->opaque_hit = true;
|
||||
return;
|
||||
}
|
||||
|
||||
++ctx->num_hits;
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (current_isect.type & PRIMITIVE_CURVE) {
|
||||
ctx->throughput *= intersection_curve_shadow_transparency(
|
||||
kg, current_isect.object, current_isect.prim, current_isect.u);
|
||||
|
||||
if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
|
||||
ctx->opaque_hit = true;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
|
||||
uint isect_index = num_recorded_hits;
|
||||
if (num_recorded_hits + 1 >= max_record_hits) {
|
||||
float max_t = ctx->isect_s[0].t;
|
||||
uint max_recorded_hit = 0;
|
||||
|
||||
for (uint i = 1; i < num_recorded_hits; ++i) {
|
||||
if (ctx->isect_s[i].t > max_t) {
|
||||
max_recorded_hit = i;
|
||||
max_t = ctx->isect_s[i].t;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_recorded_hits >= max_record_hits) {
|
||||
isect_index = max_recorded_hit;
|
||||
}
|
||||
|
||||
/* Limit the ray distance and stop counting hits beyond this.
|
||||
* TODO: is there some way we can tell Embree to stop intersecting beyond
|
||||
* this distance when max number of hits is reached?. Or maybe it will
|
||||
* become irrelevant if we make max_hits a very high number on the CPU. */
|
||||
ctx->max_t = max(current_isect.t, max_t);
|
||||
}
|
||||
|
||||
ctx->isect_s[isect_index] = current_isect;
|
||||
}
|
||||
|
||||
/* Always increase the number of recorded hits, even beyond the maximum,
|
||||
* so that we can detect this and trace another ray if needed. */
|
||||
++ctx->num_recorded_hits;
|
||||
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
case CCLIntersectContext::RAY_LOCAL:
|
||||
case CCLIntersectContext::RAY_SSS: {
|
||||
/* Check if it's hitting the correct object. */
|
||||
Intersection current_isect;
|
||||
if (ctx->type == CCLIntersectContext::RAY_SSS) {
|
||||
kernel_embree_convert_sss_hit(kg, ray, hit, ¤t_isect, ctx->local_object_id);
|
||||
}
|
||||
else {
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (ctx->local_object_id != current_isect.object) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (intersection_skip_self_local(cray->self, current_isect.prim)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* No intersection information requested, just return a hit. */
|
||||
if (ctx->max_hits == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ignore curves. */
|
||||
if (IS_HAIR(hit->geomID)) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
LocalIntersection *local_isect = ctx->local_isect;
|
||||
int hit_idx = 0;
|
||||
|
||||
if (ctx->lcg_state) {
|
||||
/* See triangle_intersect_subsurface() for the native equivalent. */
|
||||
for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (local_isect->hits[i].t == ray->tfar) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits <= ctx->max_hits) {
|
||||
hit_idx = local_isect->num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
|
||||
|
||||
if (hit_idx >= ctx->max_hits) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Record closest intersection only. */
|
||||
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
local_isect->hits[hit_idx] = current_isect;
|
||||
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
case CCLIntersectContext::RAY_VOLUME_ALL: {
|
||||
/* Append the intersection to the end of the array. */
|
||||
if (ctx->num_hits < ctx->max_hits) {
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
Intersection *isect = &ctx->isect_s[ctx->num_hits];
|
||||
++ctx->num_hits;
|
||||
*isect = current_isect;
|
||||
/* Only primitives from volume object. */
|
||||
uint tri_object = isect->object;
|
||||
int object_flag = kernel_data_fetch(object_flag, tri_object);
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
--ctx->num_hits;
|
||||
}
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CCLIntersectContext::RAY_REGULAR:
|
||||
default:
|
||||
if (kernel_embree_is_self_intersection(kg, hit, cray)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore back-facing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(kg, hit, cray)) {
|
||||
*args->valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void rtc_filter_occluded_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore back-facing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
rtc_filter_occluded_func(args);
|
||||
}
|
||||
|
||||
static size_t unaccounted_mem = 0;
|
||||
|
||||
static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
|
||||
@@ -535,8 +272,8 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
|
||||
set_tri_vertex_buffer(geom_id, mesh, false);
|
||||
|
||||
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_intersection_func);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_intersection_func);
|
||||
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||
|
||||
rtcCommitGeometry(geom_id);
|
||||
@@ -739,8 +476,8 @@ void BVHEmbree::add_points(const Object *ob, const PointCloud *pointcloud, int i
|
||||
set_point_vertex_buffer(geom_id, pointcloud, false);
|
||||
|
||||
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_func_backface_cull);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func_backface_cull);
|
||||
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||
|
||||
rtcCommitGeometry(geom_id);
|
||||
@@ -799,12 +536,13 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
||||
|
||||
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
|
||||
if (hair->curve_shape == CURVE_RIBBON) {
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_intersection_func);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_intersection_func);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func);
|
||||
}
|
||||
else {
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_func_backface_cull);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id,
|
||||
kernel_embree_filter_occluded_func_backface_cull);
|
||||
}
|
||||
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||
|
||||
|
@@ -402,6 +402,18 @@ unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
|
||||
return make_unique<OneapiDeviceQueue>(this);
|
||||
}
|
||||
|
||||
int OneapiDevice::get_num_multiprocessors()
|
||||
{
|
||||
assert(device_queue_);
|
||||
return oneapi_dll_.oneapi_get_num_multiprocessors(device_queue_);
|
||||
}
|
||||
|
||||
int OneapiDevice::get_max_num_threads_per_multiprocessor()
|
||||
{
|
||||
assert(device_queue_);
|
||||
return oneapi_dll_.oneapi_get_max_num_threads_per_multiprocessor(device_queue_);
|
||||
}
|
||||
|
||||
bool OneapiDevice::should_use_graphics_interop()
|
||||
{
|
||||
/* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
|
||||
|
@@ -89,6 +89,9 @@ class OneapiDevice : public Device {
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
int get_num_multiprocessors();
|
||||
int get_max_num_threads_per_multiprocessor();
|
||||
|
||||
/* NOTE(@nsirgien): Create this methods to avoid some compilation problems on Windows with host
|
||||
* side compilation (MSVC). */
|
||||
void *usm_aligned_alloc_host(size_t memory_size, size_t alignment);
|
||||
|
@@ -36,34 +36,9 @@ OneapiDeviceQueue::~OneapiDeviceQueue()
|
||||
|
||||
int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
|
||||
{
|
||||
int num_states;
|
||||
|
||||
/* TODO: implement and use get_num_multiprocessors and get_max_num_threads_per_multiprocessor. */
|
||||
const size_t compute_units = oneapi_dll_.oneapi_get_compute_units_amount(
|
||||
oneapi_device_->sycl_queue());
|
||||
if (compute_units >= 128) {
|
||||
/* dGPU path, make sense to allocate more states, because it will be dedicated GPU memory. */
|
||||
int base = 1024 * 1024;
|
||||
/* linear dependency (with coefficient less that 1) from amount of compute units. */
|
||||
num_states = (base * (compute_units / 128)) * 3 / 4;
|
||||
|
||||
/* Limit amount of integrator states by one quarter of device memory, because
|
||||
* other allocations will need some space as well
|
||||
* TODO: base this calculation on the how many states what the GPU is actually capable of
|
||||
* running, with some headroom to improve occupancy. If the texture don't fit, offload into
|
||||
* unified memory. */
|
||||
size_t states_memory_size = num_states * state_size;
|
||||
size_t device_memory_amount =
|
||||
(oneapi_dll_.oneapi_get_memcapacity)(oneapi_device_->sycl_queue());
|
||||
if (states_memory_size >= device_memory_amount / 4) {
|
||||
num_states = device_memory_amount / 4 / state_size;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* iGPU path - no real need to allocate a lot of integrator states because it is shared GPU
|
||||
* memory. */
|
||||
num_states = 1024 * 512;
|
||||
}
|
||||
const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
|
||||
oneapi_device_->get_max_num_threads_per_multiprocessor();
|
||||
int num_states = max(8 * max_num_threads, 65536) * 16;
|
||||
|
||||
VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
|
||||
<< string_human_readable_size(num_states * state_size);
|
||||
@@ -73,14 +48,10 @@ int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
|
||||
|
||||
int OneapiDeviceQueue::num_concurrent_busy_states() const
|
||||
{
|
||||
const size_t compute_units = oneapi_dll_.oneapi_get_compute_units_amount(
|
||||
oneapi_device_->sycl_queue());
|
||||
if (compute_units >= 128) {
|
||||
return 1024 * 1024;
|
||||
}
|
||||
else {
|
||||
return 1024 * 512;
|
||||
}
|
||||
const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
|
||||
oneapi_device_->get_max_num_threads_per_multiprocessor();
|
||||
|
||||
return 4 * max(8 * max_num_threads, 65536);
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::init_execution()
|
||||
|
@@ -26,7 +26,6 @@
|
||||
# include "util/task.h"
|
||||
# include "util/time.h"
|
||||
|
||||
# undef __KERNEL_CPU__
|
||||
# define __KERNEL_OPTIX__
|
||||
# include "kernel/device/optix/globals.h"
|
||||
|
||||
|
@@ -8,7 +8,6 @@
|
||||
|
||||
# include "util/time.h"
|
||||
|
||||
# undef __KERNEL_CPU__
|
||||
# define __KERNEL_OPTIX__
|
||||
# include "kernel/device/optix/globals.h"
|
||||
|
||||
|
@@ -42,6 +42,7 @@ set(SRC_KERNEL_DEVICE_ONEAPI
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_CPU_HEADERS
|
||||
device/cpu/bvh.h
|
||||
device/cpu/compat.h
|
||||
device/cpu/image.h
|
||||
device/cpu/globals.h
|
||||
@@ -71,11 +72,13 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
||||
device/optix/bvh.h
|
||||
device/optix/compat.h
|
||||
device/optix/globals.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
||||
device/metal/bvh.h
|
||||
device/metal/compat.h
|
||||
device/metal/context_begin.h
|
||||
device/metal/context_end.h
|
||||
@@ -214,8 +217,6 @@ set(SRC_KERNEL_BVH_HEADERS
|
||||
bvh/util.h
|
||||
bvh/volume.h
|
||||
bvh/volume_all.h
|
||||
bvh/embree.h
|
||||
bvh/metal.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_CAMERA_HEADERS
|
||||
@@ -316,6 +317,7 @@ set(SRC_UTIL_HEADERS
|
||||
../util/math_float2.h
|
||||
../util/math_float3.h
|
||||
../util/math_float4.h
|
||||
../util/math_float8.h
|
||||
../util/math_int2.h
|
||||
../util/math_int3.h
|
||||
../util/math_int4.h
|
||||
@@ -353,8 +355,6 @@ set(SRC_UTIL_HEADERS
|
||||
../util/types_uint4.h
|
||||
../util/types_uint4_impl.h
|
||||
../util/types_ushort4.h
|
||||
../util/types_vector3.h
|
||||
../util/types_vector3_impl.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
@@ -1,40 +1,47 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
/* BVH
|
||||
*
|
||||
* Bounding volume hierarchy for ray tracing. We compile different variations
|
||||
* of the same BVH traversal function for faster rendering when some types of
|
||||
* primitives are not needed, using #includes to work around the lack of
|
||||
* C++ templates in OpenCL.
|
||||
*
|
||||
* Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
|
||||
* the code has been extended and modified to support more primitives and work
|
||||
* with CPU/CUDA/OpenCL. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __EMBREE__
|
||||
# include "kernel/bvh/embree.h"
|
||||
#endif
|
||||
|
||||
#ifdef __METALRT__
|
||||
# include "kernel/bvh/metal.h"
|
||||
#endif
|
||||
|
||||
#include "kernel/bvh/types.h"
|
||||
#include "kernel/bvh/util.h"
|
||||
|
||||
#include "kernel/integrator/state_util.h"
|
||||
|
||||
/* Device specific acceleration structures for ray tracing. */
|
||||
|
||||
#if defined(__EMBREE__)
|
||||
# include "kernel/device/cpu/bvh.h"
|
||||
# define __BVH2__
|
||||
#elif defined(__METALRT__)
|
||||
# include "kernel/device/metal/bvh.h"
|
||||
#elif defined(__KERNEL_OPTIX__)
|
||||
# include "kernel/device/optix/bvh.h"
|
||||
#else
|
||||
# define __BVH2__
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU_RAYTRACING__)
|
||||
#ifdef __BVH2__
|
||||
|
||||
/* Regular BVH traversal */
|
||||
/* BVH2
|
||||
*
|
||||
* Bounding volume hierarchy for ray tracing, when no native acceleration
|
||||
* structure is available for the device.
|
||||
|
||||
* We compile different variations of the same BVH traversal function for
|
||||
* faster rendering when some types of primitives are not needed, using #includes
|
||||
* to work around the lack of C++ templates in OpenCL.
|
||||
*
|
||||
* Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
|
||||
* the code has been extended and modified to support more primitives and work
|
||||
* with CPU and various GPU kernel languages. */
|
||||
|
||||
# include "kernel/bvh/nodes.h"
|
||||
|
||||
/* Regular BVH traversal */
|
||||
|
||||
# define BVH_FUNCTION_NAME bvh_intersect
|
||||
# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
|
||||
# include "kernel/bvh/traversal.h"
|
||||
@@ -57,9 +64,46 @@ CCL_NAMESPACE_BEGIN
|
||||
# include "kernel/bvh/traversal.h"
|
||||
# endif
|
||||
|
||||
/* Subsurface scattering BVH traversal */
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
const uint visibility,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
/* Single object BVH traversal, for SSS/AO/bevel. */
|
||||
|
||||
# ifdef __BVH_LOCAL__
|
||||
|
||||
# if defined(__BVH_LOCAL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_local
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/local.h"
|
||||
@@ -69,25 +113,40 @@ CCL_NAMESPACE_BEGIN
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/local.h"
|
||||
# endif
|
||||
# endif /* __BVH_LOCAL__ */
|
||||
|
||||
/* Volume BVH traversal */
|
||||
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private LocalIntersection *local_isect,
|
||||
int local_object,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__VOLUME__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/volume.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/volume.h"
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif
|
||||
# endif /* __VOLUME__ */
|
||||
|
||||
/* Record all intersections - Shadow BVH traversal */
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif
|
||||
|
||||
/* Transparent shadow BVH traversal, recording multiple intersections. */
|
||||
|
||||
# ifdef __SHADOW_RECORD_ALL__
|
||||
|
||||
# if defined(__SHADOW_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
|
||||
# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
|
||||
# include "kernel/bvh/shadow_all.h"
|
||||
@@ -110,412 +169,6 @@ CCL_NAMESPACE_BEGIN
|
||||
# include "kernel/bvh/shadow_all.h"
|
||||
# endif
|
||||
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
/* Record all intersections - Volume BVH traversal. */
|
||||
|
||||
# if defined(__VOLUME_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/volume_all.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/volume_all.h"
|
||||
# endif
|
||||
# endif /* __VOLUME_RECORD_ALL__ */
|
||||
|
||||
# undef BVH_FEATURE
|
||||
# undef BVH_NAME_JOIN
|
||||
# undef BVH_NAME_EVAL
|
||||
# undef BVH_FUNCTION_FULL_NAME
|
||||
|
||||
#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */
|
||||
|
||||
ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
|
||||
{
|
||||
/* NOTE: Due to some vectorization code non-finite origin point might
|
||||
* cause lots of false-positive intersections which will overflow traversal
|
||||
* stack.
|
||||
* This code is a quick way to perform early output, to avoid crashes in
|
||||
* such cases.
|
||||
* From production scenes so far it seems it's enough to test first element
|
||||
* only.
|
||||
* Scene intersection may also called with empty rays for conditional trace
|
||||
* calls that evaluate to false, so filter those out.
|
||||
*/
|
||||
return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
|
||||
}
|
||||
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
const uint visibility,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
uint p0 = 0;
|
||||
uint p1 = 0;
|
||||
uint p2 = 0;
|
||||
uint p3 = 0;
|
||||
uint p4 = visibility;
|
||||
uint p5 = PRIMITIVE_NONE;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
ray_flags,
|
||||
0, /* SBT offset for PG_HITD */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
isect->t = __uint_as_float(p0);
|
||||
isect->u = __uint_as_float(p1);
|
||||
isect->v = __uint_as_float(p2);
|
||||
isect->prim = p3;
|
||||
isect->object = p4;
|
||||
isect->type = p5;
|
||||
|
||||
return p5 != PRIMITIVE_NONE;
|
||||
#elif defined(__METALRT__)
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid ift_default");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.u = 0.0f;
|
||||
payload.v = 0.0f;
|
||||
payload.visibility = visibility;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
/* No further intersector setup required: Default MetalRT behavior is any-hit. */
|
||||
}
|
||||
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
/* No further intersector setup required: Shadow ray early termination is controlled by the
|
||||
* intersection handler */
|
||||
}
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time = ray->time;
|
||||
intersection = metalrt_intersect.intersect(r,
|
||||
metal_ancillaries->accel_struct,
|
||||
ray_mask,
|
||||
ray->time,
|
||||
metal_ancillaries->ift_default,
|
||||
payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
|
||||
# endif
|
||||
|
||||
if (intersection.type == intersection_type::none) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
isect->t = intersection.distance;
|
||||
|
||||
isect->prim = payload.prim;
|
||||
isect->type = payload.type;
|
||||
isect->object = intersection.user_instance_id;
|
||||
|
||||
isect->t = intersection.distance;
|
||||
if (intersection.type == intersection_type::triangle) {
|
||||
isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
|
||||
intersection.triangle_barycentric_coord.x;
|
||||
isect->v = intersection.triangle_barycentric_coord.x;
|
||||
}
|
||||
else {
|
||||
isect->u = payload.u;
|
||||
isect->v = payload.v;
|
||||
}
|
||||
|
||||
return isect->type != PRIMITIVE_NONE;
|
||||
|
||||
#else
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
isect->t = ray->tmax;
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRayHit ray_hit;
|
||||
ctx.ray = ray;
|
||||
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
|
||||
rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
|
||||
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
|
||||
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
|
||||
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
#endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private LocalIntersection *local_isect,
|
||||
int local_object,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
uint p0 = pointer_pack_to_uint_0(lcg_state);
|
||||
uint p1 = pointer_pack_to_uint_1(lcg_state);
|
||||
uint p2 = pointer_pack_to_uint_0(local_isect);
|
||||
uint p3 = pointer_pack_to_uint_1(local_isect);
|
||||
uint p4 = local_object;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
/* Is set to zero on miss or if ray is aborted, so can be used as return value. */
|
||||
uint p5 = max_hits;
|
||||
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0; /* Initialize hit count to zero. */
|
||||
}
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
0xFF,
|
||||
/* Need to always call into __anyhit__kernel_optix_local_hit. */
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
2, /* SBT offset for PG_HITL */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
return p5;
|
||||
# elif defined(__METALRT__)
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
kernel_assert(!"Invalid ift_local");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionLocalPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.local_object = local_object;
|
||||
payload.max_hits = max_hits;
|
||||
payload.local_isect.num_hits = 0;
|
||||
if (lcg_state) {
|
||||
payload.has_lcg_state = true;
|
||||
payload.lcg_state = *lcg_state;
|
||||
}
|
||||
payload.result = false;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
|
||||
# endif
|
||||
|
||||
if (lcg_state) {
|
||||
*lcg_state = payload.lcg_state;
|
||||
}
|
||||
*local_isect = payload.local_isect;
|
||||
|
||||
return payload.result;
|
||||
|
||||
# else
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
|
||||
SD_OBJECT_TRANSFORM_APPLIED);
|
||||
CCLIntersectContext ctx(
|
||||
kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
|
||||
ctx.lcg_state = lcg_state;
|
||||
ctx.max_hits = max_hits;
|
||||
ctx.ray = ray;
|
||||
ctx.local_isect = local_isect;
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
ctx.local_object_id = local_object;
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
|
||||
|
||||
/* If this object has its own BVH, use it. */
|
||||
if (has_bvh) {
|
||||
RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
|
||||
if (geom) {
|
||||
float3 P = ray->P;
|
||||
float3 dir = ray->D;
|
||||
float3 idir = ray->D;
|
||||
Transform ob_itfm;
|
||||
rtc_ray.tfar = ray->tmax *
|
||||
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
/* bvh_instance_motion_push() returns the inverse transform but
|
||||
* it's not needed here. */
|
||||
(void)ob_itfm;
|
||||
|
||||
rtc_ray.org_x = P.x;
|
||||
rtc_ray.org_y = P.y;
|
||||
rtc_ray.org_z = P.z;
|
||||
rtc_ray.dir_x = dir.x;
|
||||
rtc_ray.dir_y = dir.y;
|
||||
rtc_ray.dir_z = dir.z;
|
||||
RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
|
||||
kernel_assert(scene);
|
||||
if (scene) {
|
||||
rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
}
|
||||
|
||||
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
|
||||
return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
|
||||
;
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
# endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
ccl_private const Ray *ray,
|
||||
@@ -524,109 +177,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
ccl_private uint *num_recorded_hits,
|
||||
ccl_private float *throughput)
|
||||
{
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
uint p0 = state;
|
||||
uint p1 = __float_as_uint(1.0f); /* Throughput. */
|
||||
uint p2 = 0; /* Number of hits. */
|
||||
uint p3 = max_hits;
|
||||
uint p4 = visibility;
|
||||
uint p5 = false;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
1, /* SBT offset for PG_HITS */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
*num_recorded_hits = uint16_unpack_from_uint_0(p2);
|
||||
*throughput = __uint_as_float(p1);
|
||||
|
||||
return p5;
|
||||
# elif defined(__METALRT__)
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
|
||||
kernel_assert(!"Invalid ift_shadow");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionShadowPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.visibility = visibility;
|
||||
payload.max_hits = max_hits;
|
||||
payload.num_hits = 0;
|
||||
payload.num_recorded_hits = 0;
|
||||
payload.throughput = 1.0f;
|
||||
payload.result = false;
|
||||
payload.state = state;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time = ray->time;
|
||||
intersection = metalrt_intersect.intersect(r,
|
||||
metal_ancillaries->accel_struct,
|
||||
ray_mask,
|
||||
ray->time,
|
||||
metal_ancillaries->ift_shadow,
|
||||
payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
|
||||
# endif
|
||||
|
||||
*num_recorded_hits = payload.num_recorded_hits;
|
||||
*throughput = payload.throughput;
|
||||
|
||||
return payload.result;
|
||||
|
||||
# else
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
*num_recorded_hits = 0;
|
||||
*throughput = 1.0f;
|
||||
return false;
|
||||
@@ -634,21 +185,10 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
Intersection *isect_array = (Intersection *)state->shadow_isect;
|
||||
ctx.isect_s = isect_array;
|
||||
ctx.max_hits = max_hits;
|
||||
ctx.ray = ray;
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
|
||||
*num_recorded_hits = ctx.num_recorded_hits;
|
||||
*throughput = ctx.throughput;
|
||||
return ctx.opaque_hit;
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
# endif
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
@@ -662,7 +202,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
@@ -673,132 +213,29 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
# endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
/* Volume BVH traversal, for initializing or updating the volume stack. */
|
||||
|
||||
# if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__)
|
||||
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/volume.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/volume.h"
|
||||
# endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
const uint visibility)
|
||||
{
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
uint p0 = 0;
|
||||
uint p1 = 0;
|
||||
uint p2 = 0;
|
||||
uint p3 = 0;
|
||||
uint p4 = visibility;
|
||||
uint p5 = PRIMITIVE_NONE;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_volume_test. */
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
3, /* SBT offset for PG_HITV */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
isect->t = __uint_as_float(p0);
|
||||
isect->u = __uint_as_float(p1);
|
||||
isect->v = __uint_as_float(p2);
|
||||
isect->prim = p3;
|
||||
isect->object = p4;
|
||||
isect->type = p5;
|
||||
|
||||
return p5 != PRIMITIVE_NONE;
|
||||
# elif defined(__METALRT__)
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||
kernel_assert(!"Invalid ift_default");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.visibility = visibility;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time = ray->time;
|
||||
intersection = metalrt_intersect.intersect(r,
|
||||
metal_ancillaries->accel_struct,
|
||||
ray_mask,
|
||||
ray->time,
|
||||
metal_ancillaries->ift_default,
|
||||
payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
|
||||
# endif
|
||||
|
||||
if (intersection.type == intersection_type::none) {
|
||||
return false;
|
||||
}
|
||||
|
||||
isect->prim = payload.prim;
|
||||
isect->type = payload.type;
|
||||
isect->object = intersection.user_instance_id;
|
||||
|
||||
isect->t = intersection.distance;
|
||||
if (intersection.type == intersection_type::triangle) {
|
||||
isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
|
||||
intersection.triangle_barycentric_coord.x;
|
||||
isect->v = intersection.triangle_barycentric_coord.x;
|
||||
}
|
||||
else {
|
||||
isect->u = payload.u;
|
||||
isect->v = payload.v;
|
||||
}
|
||||
|
||||
return isect->type != PRIMITIVE_NONE;
|
||||
|
||||
# else
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -809,44 +246,56 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
# endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
#endif /* __VOLUME__ */
|
||||
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
const uint max_hits,
|
||||
const uint visibility)
|
||||
/* Volume BVH traversal, for initializing or updating the volume stack.
|
||||
* Variation that records multiple intersections at once. */
|
||||
|
||||
# if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__)
|
||||
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/volume_all.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/volume_all.h"
|
||||
# endif
|
||||
|
||||
ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
const uint max_hits,
|
||||
const uint visibility)
|
||||
{
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
ctx.isect_s = isect;
|
||||
ctx.max_hits = max_hits;
|
||||
ctx.num_hits = 0;
|
||||
ctx.ray = ray;
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
return ctx.num_hits;
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
# endif
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
#endif /* __VOLUME_RECORD_ALL__ */
|
||||
|
||||
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
|
||||
|
||||
# undef BVH_FEATURE
|
||||
# undef BVH_NAME_JOIN
|
||||
# undef BVH_NAME_EVAL
|
||||
# undef BVH_FUNCTION_FULL_NAME
|
||||
|
||||
#endif /* __BVH2__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -1,176 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2018-2022 Blender Foundation. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <embree3/rtcore_ray.h>
|
||||
#include <embree3/rtcore_scene.h>
|
||||
|
||||
#include "kernel/device/cpu/compat.h"
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
|
||||
#include "kernel/bvh/util.h"
|
||||
|
||||
#include "util/vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct CCLIntersectContext {
|
||||
typedef enum {
|
||||
RAY_REGULAR = 0,
|
||||
RAY_SHADOW_ALL = 1,
|
||||
RAY_LOCAL = 2,
|
||||
RAY_SSS = 3,
|
||||
RAY_VOLUME_ALL = 4,
|
||||
} RayType;
|
||||
|
||||
KernelGlobals kg;
|
||||
RayType type;
|
||||
|
||||
/* For avoiding self intersections */
|
||||
const Ray *ray;
|
||||
|
||||
/* for shadow rays */
|
||||
Intersection *isect_s;
|
||||
uint max_hits;
|
||||
uint num_hits;
|
||||
uint num_recorded_hits;
|
||||
float throughput;
|
||||
float max_t;
|
||||
bool opaque_hit;
|
||||
|
||||
/* for SSS Rays: */
|
||||
LocalIntersection *local_isect;
|
||||
int local_object_id;
|
||||
uint *lcg_state;
|
||||
|
||||
CCLIntersectContext(KernelGlobals kg_, RayType type_)
|
||||
{
|
||||
kg = kg_;
|
||||
type = type_;
|
||||
ray = NULL;
|
||||
max_hits = 1;
|
||||
num_hits = 0;
|
||||
num_recorded_hits = 0;
|
||||
throughput = 1.0f;
|
||||
max_t = FLT_MAX;
|
||||
opaque_hit = false;
|
||||
isect_s = NULL;
|
||||
local_isect = NULL;
|
||||
local_object_id = -1;
|
||||
lcg_state = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
class IntersectContext {
|
||||
public:
|
||||
IntersectContext(CCLIntersectContext *ctx)
|
||||
{
|
||||
rtcInitIntersectContext(&context);
|
||||
userRayExt = ctx;
|
||||
}
|
||||
RTCIntersectContext context;
|
||||
CCLIntersectContext *userRayExt;
|
||||
};
|
||||
|
||||
ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
|
||||
RTCRay &rtc_ray,
|
||||
const uint visibility)
|
||||
{
|
||||
rtc_ray.org_x = ray.P.x;
|
||||
rtc_ray.org_y = ray.P.y;
|
||||
rtc_ray.org_z = ray.P.z;
|
||||
rtc_ray.dir_x = ray.D.x;
|
||||
rtc_ray.dir_y = ray.D.y;
|
||||
rtc_ray.dir_z = ray.D.z;
|
||||
rtc_ray.tnear = ray.tmin;
|
||||
rtc_ray.tfar = ray.tmax;
|
||||
rtc_ray.time = ray.time;
|
||||
rtc_ray.mask = visibility;
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
|
||||
RTCRayHit &rayhit,
|
||||
const uint visibility)
|
||||
{
|
||||
kernel_embree_setup_ray(ray, rayhit.ray, visibility);
|
||||
rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
|
||||
rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
}
|
||||
|
||||
ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
|
||||
const RTCHit *hit,
|
||||
const Ray *ray)
|
||||
{
|
||||
bool status = false;
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
const int oID = hit->instID[0] / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
const int pID = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
}
|
||||
}
|
||||
else {
|
||||
const int oID = hit->geomID / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
|
||||
const RTCRay *ray,
|
||||
const RTCHit *hit,
|
||||
Intersection *isect)
|
||||
{
|
||||
isect->t = ray->tfar;
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = hit->instID[0] / 2;
|
||||
}
|
||||
else {
|
||||
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
isect->object = hit->geomID / 2;
|
||||
}
|
||||
|
||||
const bool is_hair = hit->geomID & 1;
|
||||
if (is_hair) {
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
|
||||
isect->type = segment.type;
|
||||
isect->prim = segment.prim;
|
||||
isect->u = hit->u;
|
||||
isect->v = hit->v;
|
||||
}
|
||||
else {
|
||||
isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
|
||||
isect->u = 1.0f - hit->v - hit->u;
|
||||
isect->v = hit->u;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_convert_sss_hit(
|
||||
KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
|
||||
{
|
||||
isect->u = 1.0f - hit->v - hit->u;
|
||||
isect->v = hit->u;
|
||||
isect->t = ray->tfar;
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, object * 2));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = object;
|
||||
isect->type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -59,14 +59,10 @@ ccl_device_inline
|
||||
const int object_flag = kernel_data_fetch(object_flag, local_object);
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
isect_t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
object = local_object;
|
||||
}
|
||||
|
||||
|
@@ -1,37 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
struct MetalRTIntersectionPayload {
|
||||
RaySelfPrimitives self;
|
||||
uint visibility;
|
||||
float u, v;
|
||||
int prim;
|
||||
int type;
|
||||
#if defined(__METALRT_MOTION__)
|
||||
float time;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct MetalRTIntersectionLocalPayload {
|
||||
RaySelfPrimitives self;
|
||||
uint local_object;
|
||||
uint lcg_state;
|
||||
short max_hits;
|
||||
bool has_lcg_state;
|
||||
bool result;
|
||||
LocalIntersection local_isect;
|
||||
};
|
||||
|
||||
struct MetalRTIntersectionShadowPayload {
|
||||
RaySelfPrimitives self;
|
||||
uint visibility;
|
||||
#if defined(__METALRT_MOTION__)
|
||||
float time;
|
||||
#endif
|
||||
int state;
|
||||
float throughput;
|
||||
short max_hits;
|
||||
short num_hits;
|
||||
short num_recorded_hits;
|
||||
bool result;
|
||||
};
|
@@ -53,23 +53,11 @@ ccl_device_inline
|
||||
int object = OBJECT_NONE;
|
||||
uint num_hits = 0;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
/* Max distance in world space. May be dynamically reduced when max number of
|
||||
* recorded hits is exceeded and we no longer need to find hits beyond the max
|
||||
* distance found. */
|
||||
float t_max_world = ray->tmax;
|
||||
|
||||
/* Current maximum distance to the intersection.
|
||||
* Is calculated as a ray length, transformed to an object space when entering
|
||||
* instance node. */
|
||||
float t_max_current = ray->tmax;
|
||||
|
||||
/* Conversion from world to local space for the current instance if any, 1.0
|
||||
* otherwise. */
|
||||
float t_world_to_instance = 1.0f;
|
||||
const float tmax = ray->tmax;
|
||||
float tmax_hits = tmax;
|
||||
|
||||
*r_num_recorded_hits = 0;
|
||||
*r_throughput = 1.0f;
|
||||
@@ -90,7 +78,7 @@ ccl_device_inline
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
tmax,
|
||||
node_addr,
|
||||
visibility,
|
||||
dist);
|
||||
@@ -158,16 +146,8 @@ ccl_device_inline
|
||||
|
||||
switch (type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr);
|
||||
hit = triangle_intersect(
|
||||
kg, &isect, P, dir, tmin, tmax, visibility, prim_object, prim, prim_addr);
|
||||
break;
|
||||
}
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
@@ -177,7 +157,7 @@ ccl_device_inline
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
tmax,
|
||||
ray->time,
|
||||
visibility,
|
||||
prim_object,
|
||||
@@ -200,16 +180,8 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
hit = curve_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
prim_object,
|
||||
prim,
|
||||
ray->time,
|
||||
curve_type);
|
||||
hit = curve_intersect(
|
||||
kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, curve_type);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -226,16 +198,8 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
const int point_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
hit = point_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
prim_object,
|
||||
prim,
|
||||
ray->time,
|
||||
point_type);
|
||||
hit = point_intersect(
|
||||
kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, point_type);
|
||||
break;
|
||||
}
|
||||
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
|
||||
@@ -247,9 +211,6 @@ ccl_device_inline
|
||||
|
||||
/* shadow ray early termination */
|
||||
if (hit) {
|
||||
/* Convert intersection distance to world space. */
|
||||
isect.t /= t_world_to_instance;
|
||||
|
||||
/* detect if this surface has a shader with transparent shadows */
|
||||
/* todo: optimize so primitive visibility flag indicates if
|
||||
* the primitive has a transparent shadow shader? */
|
||||
@@ -281,7 +242,7 @@ ccl_device_inline
|
||||
if (record_intersection) {
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (*r_num_recorded_hits < max_record_hits || isect.t < t_max_world) {
|
||||
if (*r_num_recorded_hits < max_record_hits || isect.t < tmax_hits) {
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
const uint num_recorded_hits = min(*r_num_recorded_hits, max_record_hits);
|
||||
@@ -303,7 +264,7 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
/* Limit the ray distance and stop counting hits beyond this. */
|
||||
t_max_world = max(isect.t, max_t);
|
||||
tmax_hits = max(isect.t, max_t);
|
||||
}
|
||||
|
||||
integrator_state_write_shadow_isect(state, &isect, isect_index);
|
||||
@@ -321,16 +282,11 @@ ccl_device_inline
|
||||
object = kernel_data_fetch(prim_object, -prim_addr - 1);
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
|
||||
#else
|
||||
t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
/* Convert intersection to object space. */
|
||||
t_max_current *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -344,18 +300,9 @@ ccl_device_inline
|
||||
kernel_assert(object != OBJECT_NONE);
|
||||
|
||||
/* Instance pop. */
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
#else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
#endif
|
||||
|
||||
/* Restore world space ray length. */
|
||||
tmin = ray->tmin;
|
||||
t_max_current = ray->tmax;
|
||||
bvh_instance_pop(ray, &P, &dir, &idir);
|
||||
|
||||
object = OBJECT_NONE;
|
||||
t_world_to_instance = 1.0f;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
--stack_ptr;
|
||||
}
|
||||
|
@@ -43,13 +43,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
const float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
isect->t = ray->tmax;
|
||||
isect->u = 0.0f;
|
||||
isect->v = 0.0f;
|
||||
@@ -223,15 +219,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
object = kernel_data_fetch(prim_object, -prim_addr - 1);
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect->t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -245,12 +237,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
kernel_assert(object != OBJECT_NONE);
|
||||
|
||||
/* instance pop */
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
#else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
#endif
|
||||
tmin = ray->tmin;
|
||||
bvh_instance_pop(ray, &P, &dir, &idir);
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
|
@@ -5,20 +5,35 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray)
|
||||
{
|
||||
/* NOTE: Due to some vectorization code non-finite origin point might
|
||||
* cause lots of false-positive intersections which will overflow traversal
|
||||
* stack.
|
||||
* This code is a quick way to perform early output, to avoid crashes in
|
||||
* such cases.
|
||||
* From production scenes so far it seems it's enough to test first element
|
||||
* only.
|
||||
* Scene intersection may also called with empty rays for conditional trace
|
||||
* calls that evaluate to false, so filter those out.
|
||||
*/
|
||||
return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
|
||||
}
|
||||
|
||||
/* Offset intersection distance by the smallest possible amount, to skip
|
||||
* intersections at this distance. This works in cases where the ray start
|
||||
* position is unchanged and only tmin is updated, since for self
|
||||
* intersection we'll be comparing against the exact same distances. */
|
||||
ccl_device_forceinline float intersection_t_offset(const float t)
|
||||
{
|
||||
/* This is a simplified version of nextafterf(t, FLT_MAX), only dealing with
|
||||
/* This is a simplified version of `nextafterf(t, FLT_MAX)`, only dealing with
|
||||
* non-negative and finite t. */
|
||||
kernel_assert(t >= 0.0f && isfinite_safe(t));
|
||||
const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
|
||||
return __uint_as_float(bits);
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_CPU__)
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device int intersections_compare(const void *a, const void *b)
|
||||
{
|
||||
const Intersection *isect_a = (const Intersection *)a;
|
||||
|
@@ -46,13 +46,9 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
const float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
isect->t = ray->tmax;
|
||||
isect->u = 0.0f;
|
||||
isect->v = 0.0f;
|
||||
@@ -189,15 +185,11 @@ ccl_device_inline
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if (object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect->t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -218,13 +210,7 @@ ccl_device_inline
|
||||
kernel_assert(object != OBJECT_NONE);
|
||||
|
||||
/* instance pop */
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
#else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
#endif
|
||||
|
||||
tmin = ray->tmin;
|
||||
bvh_instance_pop(ray, &P, &dir, &idir);
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
|
@@ -47,14 +47,10 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
const float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
float isect_t = ray->tmax;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
int num_hits_in_instance = 0;
|
||||
|
||||
uint num_hits = 0;
|
||||
@@ -159,18 +155,6 @@ ccl_device_inline
|
||||
num_hits_in_instance++;
|
||||
isect_array->t = isect_t;
|
||||
if (num_hits == max_hits) {
|
||||
if (object != OBJECT_NONE) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
|
||||
#else
|
||||
Transform itfm = object_fetch_transform(
|
||||
kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
|
||||
#endif
|
||||
for (int i = 0; i < num_hits_in_instance; i++) {
|
||||
(isect_array - i - 1)->t *= t_fac;
|
||||
}
|
||||
}
|
||||
return num_hits;
|
||||
}
|
||||
}
|
||||
@@ -212,18 +196,6 @@ ccl_device_inline
|
||||
num_hits_in_instance++;
|
||||
isect_array->t = isect_t;
|
||||
if (num_hits == max_hits) {
|
||||
if (object != OBJECT_NONE) {
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
|
||||
# else
|
||||
Transform itfm = object_fetch_transform(
|
||||
kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
|
||||
# endif
|
||||
for (int i = 0; i < num_hits_in_instance; i++) {
|
||||
(isect_array - i - 1)->t *= t_fac;
|
||||
}
|
||||
}
|
||||
return num_hits;
|
||||
}
|
||||
}
|
||||
@@ -242,15 +214,11 @@ ccl_device_inline
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if (object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect_t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
num_hits_in_instance = 0;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
@@ -274,29 +242,7 @@ ccl_device_inline
|
||||
kernel_assert(object != OBJECT_NONE);
|
||||
|
||||
/* Instance pop. */
|
||||
if (num_hits_in_instance) {
|
||||
float t_fac;
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
|
||||
#else
|
||||
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
||||
#endif
|
||||
/* Scale isect->t to adjust for instancing. */
|
||||
for (int i = 0; i < num_hits_in_instance; i++) {
|
||||
(isect_array - i - 1)->t *= t_fac;
|
||||
}
|
||||
}
|
||||
else {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
#else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
#endif
|
||||
}
|
||||
|
||||
tmin = ray->tmin;
|
||||
isect_t = ray->tmax;
|
||||
isect_array->t = isect_t;
|
||||
bvh_instance_pop(ray, &P, &dir, &idir);
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <fenv.h>
|
||||
#endif
|
||||
|
||||
|
@@ -70,7 +70,7 @@ KERNEL_STRUCT_MEMBER(film, float4, rec709_to_r)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, rec709_to_g)
|
||||
KERNEL_STRUCT_MEMBER(film, float4, rec709_to_b)
|
||||
KERNEL_STRUCT_MEMBER(film, int, is_rec709)
|
||||
/* Exposuse. */
|
||||
/* Exposure. */
|
||||
KERNEL_STRUCT_MEMBER(film, float, exposure)
|
||||
/* Passed used. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_flag)
|
||||
|
572
intern/cycles/kernel/device/cpu/bvh.h
Normal file
572
intern/cycles/kernel/device/cpu/bvh.h
Normal file
@@ -0,0 +1,572 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
/* CPU Embree implementation of ray-scene intersection. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <embree3/rtcore_ray.h>
|
||||
#include <embree3/rtcore_scene.h>
|
||||
|
||||
#include "kernel/device/cpu/compat.h"
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
|
||||
#include "kernel/bvh/types.h"
|
||||
#include "kernel/bvh/util.h"
|
||||
#include "kernel/geom/object.h"
|
||||
#include "kernel/integrator/state.h"
|
||||
#include "kernel/sample/lcg.h"
|
||||
|
||||
#include "util/vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#define EMBREE_IS_HAIR(x) (x & 1)
|
||||
|
||||
/* Intersection context. */
|
||||
|
||||
struct CCLIntersectContext {
|
||||
typedef enum {
|
||||
RAY_REGULAR = 0,
|
||||
RAY_SHADOW_ALL = 1,
|
||||
RAY_LOCAL = 2,
|
||||
RAY_SSS = 3,
|
||||
RAY_VOLUME_ALL = 4,
|
||||
} RayType;
|
||||
|
||||
KernelGlobals kg;
|
||||
RayType type;
|
||||
|
||||
/* For avoiding self intersections */
|
||||
const Ray *ray;
|
||||
|
||||
/* for shadow rays */
|
||||
Intersection *isect_s;
|
||||
uint max_hits;
|
||||
uint num_hits;
|
||||
uint num_recorded_hits;
|
||||
float throughput;
|
||||
float max_t;
|
||||
bool opaque_hit;
|
||||
|
||||
/* for SSS Rays: */
|
||||
LocalIntersection *local_isect;
|
||||
int local_object_id;
|
||||
uint *lcg_state;
|
||||
|
||||
CCLIntersectContext(KernelGlobals kg_, RayType type_)
|
||||
{
|
||||
kg = kg_;
|
||||
type = type_;
|
||||
ray = NULL;
|
||||
max_hits = 1;
|
||||
num_hits = 0;
|
||||
num_recorded_hits = 0;
|
||||
throughput = 1.0f;
|
||||
max_t = FLT_MAX;
|
||||
opaque_hit = false;
|
||||
isect_s = NULL;
|
||||
local_isect = NULL;
|
||||
local_object_id = -1;
|
||||
lcg_state = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
class IntersectContext {
|
||||
public:
|
||||
IntersectContext(CCLIntersectContext *ctx)
|
||||
{
|
||||
rtcInitIntersectContext(&context);
|
||||
userRayExt = ctx;
|
||||
}
|
||||
RTCIntersectContext context;
|
||||
CCLIntersectContext *userRayExt;
|
||||
};
|
||||
|
||||
/* Utilities. */
|
||||
|
||||
ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
|
||||
RTCRay &rtc_ray,
|
||||
const uint visibility)
|
||||
{
|
||||
rtc_ray.org_x = ray.P.x;
|
||||
rtc_ray.org_y = ray.P.y;
|
||||
rtc_ray.org_z = ray.P.z;
|
||||
rtc_ray.dir_x = ray.D.x;
|
||||
rtc_ray.dir_y = ray.D.y;
|
||||
rtc_ray.dir_z = ray.D.z;
|
||||
rtc_ray.tnear = ray.tmin;
|
||||
rtc_ray.tfar = ray.tmax;
|
||||
rtc_ray.time = ray.time;
|
||||
rtc_ray.mask = visibility;
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
|
||||
RTCRayHit &rayhit,
|
||||
const uint visibility)
|
||||
{
|
||||
kernel_embree_setup_ray(ray, rayhit.ray, visibility);
|
||||
rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
|
||||
rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
}
|
||||
|
||||
ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
|
||||
const RTCHit *hit,
|
||||
const Ray *ray)
|
||||
{
|
||||
bool status = false;
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
const int oID = hit->instID[0] / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
const int pID = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
}
|
||||
}
|
||||
else {
|
||||
const int oID = hit->geomID / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
|
||||
const RTCRay *ray,
|
||||
const RTCHit *hit,
|
||||
Intersection *isect)
|
||||
{
|
||||
isect->t = ray->tfar;
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = hit->instID[0] / 2;
|
||||
}
|
||||
else {
|
||||
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
isect->object = hit->geomID / 2;
|
||||
}
|
||||
|
||||
const bool is_hair = hit->geomID & 1;
|
||||
if (is_hair) {
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
|
||||
isect->type = segment.type;
|
||||
isect->prim = segment.prim;
|
||||
isect->u = hit->u;
|
||||
isect->v = hit->v;
|
||||
}
|
||||
else {
|
||||
isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
|
||||
isect->u = hit->u;
|
||||
isect->v = hit->v;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_convert_sss_hit(
|
||||
KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
|
||||
{
|
||||
isect->u = hit->u;
|
||||
isect->v = hit->v;
|
||||
isect->t = ray->tfar;
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, object * 2));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = object;
|
||||
isect->type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
|
||||
/* Ray filter functions. */
|
||||
|
||||
/* This gets called by Embree at every valid ray/object intersection.
|
||||
* Things like recording subsurface or shadow hits for later evaluation
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls. */
|
||||
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
assert(args->N == 1);
|
||||
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(kg, hit, cray)) {
|
||||
*args->valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* This gets called by Embree at every valid ray/object intersection.
|
||||
* Things like recording subsurface or shadow hits for later evaluation
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls.
|
||||
*/
|
||||
ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
assert(args->N == 1);
|
||||
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
switch (ctx->type) {
|
||||
case CCLIntersectContext::RAY_SHADOW_ALL: {
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
/* If no transparent shadows or max number of hits exceeded, all light is blocked. */
|
||||
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
|
||||
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
|
||||
ctx->opaque_hit = true;
|
||||
return;
|
||||
}
|
||||
|
||||
++ctx->num_hits;
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (current_isect.type & PRIMITIVE_CURVE) {
|
||||
ctx->throughput *= intersection_curve_shadow_transparency(
|
||||
kg, current_isect.object, current_isect.prim, current_isect.u);
|
||||
|
||||
if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
|
||||
ctx->opaque_hit = true;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
|
||||
uint isect_index = num_recorded_hits;
|
||||
if (num_recorded_hits + 1 >= max_record_hits) {
|
||||
float max_t = ctx->isect_s[0].t;
|
||||
uint max_recorded_hit = 0;
|
||||
|
||||
for (uint i = 1; i < num_recorded_hits; ++i) {
|
||||
if (ctx->isect_s[i].t > max_t) {
|
||||
max_recorded_hit = i;
|
||||
max_t = ctx->isect_s[i].t;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_recorded_hits >= max_record_hits) {
|
||||
isect_index = max_recorded_hit;
|
||||
}
|
||||
|
||||
/* Limit the ray distance and stop counting hits beyond this.
|
||||
* TODO: is there some way we can tell Embree to stop intersecting beyond
|
||||
* this distance when max number of hits is reached?. Or maybe it will
|
||||
* become irrelevant if we make max_hits a very high number on the CPU. */
|
||||
ctx->max_t = max(current_isect.t, max_t);
|
||||
}
|
||||
|
||||
ctx->isect_s[isect_index] = current_isect;
|
||||
}
|
||||
|
||||
/* Always increase the number of recorded hits, even beyond the maximum,
|
||||
* so that we can detect this and trace another ray if needed. */
|
||||
++ctx->num_recorded_hits;
|
||||
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
case CCLIntersectContext::RAY_LOCAL:
|
||||
case CCLIntersectContext::RAY_SSS: {
|
||||
/* Check if it's hitting the correct object. */
|
||||
Intersection current_isect;
|
||||
if (ctx->type == CCLIntersectContext::RAY_SSS) {
|
||||
kernel_embree_convert_sss_hit(kg, ray, hit, ¤t_isect, ctx->local_object_id);
|
||||
}
|
||||
else {
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (ctx->local_object_id != current_isect.object) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (intersection_skip_self_local(cray->self, current_isect.prim)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* No intersection information requested, just return a hit. */
|
||||
if (ctx->max_hits == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ignore curves. */
|
||||
if (EMBREE_IS_HAIR(hit->geomID)) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
LocalIntersection *local_isect = ctx->local_isect;
|
||||
int hit_idx = 0;
|
||||
|
||||
if (ctx->lcg_state) {
|
||||
/* See triangle_intersect_subsurface() for the native equivalent. */
|
||||
for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (local_isect->hits[i].t == ray->tfar) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits <= ctx->max_hits) {
|
||||
hit_idx = local_isect->num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
|
||||
|
||||
if (hit_idx >= ctx->max_hits) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Record closest intersection only. */
|
||||
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
local_isect->hits[hit_idx] = current_isect;
|
||||
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
case CCLIntersectContext::RAY_VOLUME_ALL: {
|
||||
/* Append the intersection to the end of the array. */
|
||||
if (ctx->num_hits < ctx->max_hits) {
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
Intersection *isect = &ctx->isect_s[ctx->num_hits];
|
||||
++ctx->num_hits;
|
||||
*isect = current_isect;
|
||||
/* Only primitives from volume object. */
|
||||
uint tri_object = isect->object;
|
||||
int object_flag = kernel_data_fetch(object_flag, tri_object);
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
--ctx->num_hits;
|
||||
}
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CCLIntersectContext::RAY_REGULAR:
|
||||
default:
|
||||
if (kernel_embree_is_self_intersection(kg, hit, cray)) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore back-facing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(kg, hit, cray)) {
|
||||
*args->valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void kernel_embree_filter_occluded_func_backface_cull(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore back-facing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
kernel_embree_filter_occluded_func(args);
|
||||
}
|
||||
|
||||
/* Scene intersection. */
|
||||
|
||||
ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
const uint visibility,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
isect->t = ray->tmax;
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRayHit ray_hit;
|
||||
ctx.ray = ray;
|
||||
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
|
||||
rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
|
||||
if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID ||
|
||||
ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) {
|
||||
return false;
|
||||
}
|
||||
|
||||
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private LocalIntersection *local_isect,
|
||||
int local_object,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
|
||||
SD_OBJECT_TRANSFORM_APPLIED);
|
||||
CCLIntersectContext ctx(kg,
|
||||
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
|
||||
ctx.lcg_state = lcg_state;
|
||||
ctx.max_hits = max_hits;
|
||||
ctx.ray = ray;
|
||||
ctx.local_isect = local_isect;
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
ctx.local_object_id = local_object;
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
|
||||
|
||||
/* If this object has its own BVH, use it. */
|
||||
if (has_bvh) {
|
||||
RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
|
||||
if (geom) {
|
||||
float3 P = ray->P;
|
||||
float3 dir = ray->D;
|
||||
float3 idir = ray->D;
|
||||
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
|
||||
rtc_ray.org_x = P.x;
|
||||
rtc_ray.org_y = P.y;
|
||||
rtc_ray.org_z = P.z;
|
||||
rtc_ray.dir_x = dir.x;
|
||||
rtc_ray.dir_y = dir.y;
|
||||
rtc_ray.dir_z = dir.z;
|
||||
rtc_ray.tnear = ray->tmin;
|
||||
rtc_ray.tfar = ray->tmax;
|
||||
RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
|
||||
kernel_assert(scene);
|
||||
if (scene) {
|
||||
rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
}
|
||||
|
||||
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
|
||||
return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||
IntegratorShadowStateCPU *state,
|
||||
ccl_private const Ray *ray,
|
||||
uint visibility,
|
||||
uint max_hits,
|
||||
ccl_private uint *num_recorded_hits,
|
||||
ccl_private float *throughput)
|
||||
{
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
Intersection *isect_array = (Intersection *)state->shadow_isect;
|
||||
ctx.isect_s = isect_array;
|
||||
ctx.max_hits = max_hits;
|
||||
ctx.ray = ray;
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
|
||||
*num_recorded_hits = ctx.num_recorded_hits;
|
||||
*throughput = ctx.throughput;
|
||||
return ctx.opaque_hit;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
const uint max_hits,
|
||||
const uint visibility)
|
||||
{
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
ctx.isect_s = isect;
|
||||
ctx.max_hits = max_hits;
|
||||
ctx.num_hits = 0;
|
||||
ctx.ray = ray;
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
return ctx.num_hits;
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -3,8 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#define __KERNEL_CPU__
|
||||
|
||||
/* Release kernel has too much false-positive maybe-uninitialized warnings,
|
||||
* which makes it possible to miss actual warnings.
|
||||
*/
|
||||
@@ -35,38 +33,4 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#define kernel_assert(cond) assert(cond)
|
||||
|
||||
/* Macros to handle different memory storage on different devices */
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
typedef vector3<sseb> sse3b;
|
||||
typedef vector3<ssef> sse3f;
|
||||
typedef vector3<ssei> sse3i;
|
||||
|
||||
ccl_device_inline void print_sse3b(const char *label, sse3b &a)
|
||||
{
|
||||
print_sseb(label, a.x);
|
||||
print_sseb(label, a.y);
|
||||
print_sseb(label, a.z);
|
||||
}
|
||||
|
||||
ccl_device_inline void print_sse3f(const char *label, sse3f &a)
|
||||
{
|
||||
print_ssef(label, a.x);
|
||||
print_ssef(label, a.y);
|
||||
print_ssef(label, a.z);
|
||||
}
|
||||
|
||||
ccl_device_inline void print_sse3i(const char *label, sse3i &a)
|
||||
{
|
||||
print_ssei(label, a.x);
|
||||
print_ssei(label, a.y);
|
||||
print_ssei(label, a.z);
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
|
||||
typedef vector3<avxf> avx3f;
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
360
intern/cycles/kernel/device/metal/bvh.h
Normal file
360
intern/cycles/kernel/device/metal/bvh.h
Normal file
@@ -0,0 +1,360 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
/* MetalRT implementation of ray-scene intersection. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/bvh/types.h"
|
||||
#include "kernel/bvh/util.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Payload types. */
|
||||
|
||||
struct MetalRTIntersectionPayload {
|
||||
RaySelfPrimitives self;
|
||||
uint visibility;
|
||||
float u, v;
|
||||
int prim;
|
||||
int type;
|
||||
#if defined(__METALRT_MOTION__)
|
||||
float time;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct MetalRTIntersectionLocalPayload {
|
||||
RaySelfPrimitives self;
|
||||
uint local_object;
|
||||
uint lcg_state;
|
||||
short max_hits;
|
||||
bool has_lcg_state;
|
||||
bool result;
|
||||
LocalIntersection local_isect;
|
||||
};
|
||||
|
||||
struct MetalRTIntersectionShadowPayload {
|
||||
RaySelfPrimitives self;
|
||||
uint visibility;
|
||||
#if defined(__METALRT_MOTION__)
|
||||
float time;
|
||||
#endif
|
||||
int state;
|
||||
float throughput;
|
||||
short max_hits;
|
||||
short num_hits;
|
||||
short num_recorded_hits;
|
||||
bool result;
|
||||
};
|
||||
|
||||
/* Scene intersection. */
|
||||
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
const uint visibility,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid ift_default");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.u = 0.0f;
|
||||
payload.v = 0.0f;
|
||||
payload.visibility = visibility;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
/* No further intersector setup required: Default MetalRT behavior is any-hit. */
|
||||
}
|
||||
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
/* No further intersector setup required: Shadow ray early termination is controlled by the
|
||||
* intersection handler */
|
||||
}
|
||||
|
||||
#if defined(__METALRT_MOTION__)
|
||||
payload.time = ray->time;
|
||||
intersection = metalrt_intersect.intersect(r,
|
||||
metal_ancillaries->accel_struct,
|
||||
ray_mask,
|
||||
ray->time,
|
||||
metal_ancillaries->ift_default,
|
||||
payload);
|
||||
#else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
|
||||
#endif
|
||||
|
||||
if (intersection.type == intersection_type::none) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
isect->t = intersection.distance;
|
||||
|
||||
isect->prim = payload.prim;
|
||||
isect->type = payload.type;
|
||||
isect->object = intersection.user_instance_id;
|
||||
|
||||
isect->t = intersection.distance;
|
||||
if (intersection.type == intersection_type::triangle) {
|
||||
isect->u = intersection.triangle_barycentric_coord.x;
|
||||
isect->v = intersection.triangle_barycentric_coord.y;
|
||||
}
|
||||
else {
|
||||
isect->u = payload.u;
|
||||
isect->v = payload.v;
|
||||
}
|
||||
|
||||
return isect->type != PRIMITIVE_NONE;
|
||||
}
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private LocalIntersection *local_isect,
|
||||
int local_object,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
kernel_assert(!"Invalid ift_local");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionLocalPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.local_object = local_object;
|
||||
payload.max_hits = max_hits;
|
||||
payload.local_isect.num_hits = 0;
|
||||
if (lcg_state) {
|
||||
payload.has_lcg_state = true;
|
||||
payload.lcg_state = *lcg_state;
|
||||
}
|
||||
payload.result = false;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
|
||||
# endif
|
||||
|
||||
if (lcg_state) {
|
||||
*lcg_state = payload.lcg_state;
|
||||
}
|
||||
*local_isect = payload.local_isect;
|
||||
|
||||
return payload.result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
ccl_private const Ray *ray,
|
||||
uint visibility,
|
||||
uint max_hits,
|
||||
ccl_private uint *num_recorded_hits,
|
||||
ccl_private float *throughput)
|
||||
{
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
|
||||
kernel_assert(!"Invalid ift_shadow");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionShadowPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.visibility = visibility;
|
||||
payload.max_hits = max_hits;
|
||||
payload.num_hits = 0;
|
||||
payload.num_recorded_hits = 0;
|
||||
payload.throughput = 1.0f;
|
||||
payload.result = false;
|
||||
payload.state = state;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time = ray->time;
|
||||
intersection = metalrt_intersect.intersect(r,
|
||||
metal_ancillaries->accel_struct,
|
||||
ray_mask,
|
||||
ray->time,
|
||||
metal_ancillaries->ift_shadow,
|
||||
payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
|
||||
# endif
|
||||
|
||||
*num_recorded_hits = payload.num_recorded_hits;
|
||||
*throughput = payload.throughput;
|
||||
|
||||
return payload.result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
const uint visibility)
|
||||
{
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||
kernel_assert(!"Invalid ift_default");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
MetalRTIntersectionPayload payload;
|
||||
payload.self = ray->self;
|
||||
payload.visibility = visibility;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time = ray->time;
|
||||
intersection = metalrt_intersect.intersect(r,
|
||||
metal_ancillaries->accel_struct,
|
||||
ray_mask,
|
||||
ray->time,
|
||||
metal_ancillaries->ift_default,
|
||||
payload);
|
||||
# else
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
|
||||
# endif
|
||||
|
||||
if (intersection.type == intersection_type::none) {
|
||||
return false;
|
||||
}
|
||||
|
||||
isect->prim = payload.prim;
|
||||
isect->type = payload.type;
|
||||
isect->object = intersection.user_instance_id;
|
||||
|
||||
isect->t = intersection.distance;
|
||||
if (intersection.type == intersection_type::triangle) {
|
||||
isect->u = intersection.triangle_barycentric_coord.x;
|
||||
isect->v = intersection.triangle_barycentric_coord.y;
|
||||
}
|
||||
else {
|
||||
isect->u = payload.u;
|
||||
isect->v = payload.v;
|
||||
}
|
||||
|
||||
return isect->type != PRIMITIVE_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -260,8 +260,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||
|
||||
#ifdef __METALRT__
|
||||
|
||||
# define __KERNEL_GPU_RAYTRACING__
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
# define METALRT_TAGS instancing, instance_motion, primitive_motion
|
||||
# else
|
||||
|
@@ -1,41 +1,44 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
/* Metal kernel entry points */
|
||||
/* Metal kernel entry points. */
|
||||
|
||||
#include "kernel/device/metal/compat.h"
|
||||
#include "kernel/device/metal/globals.h"
|
||||
#include "kernel/device/metal/function_constants.h"
|
||||
#include "kernel/device/gpu/kernel.h"
|
||||
|
||||
/* MetalRT intersection handlers */
|
||||
/* MetalRT intersection handlers. */
|
||||
|
||||
#ifdef __METALRT__
|
||||
|
||||
/* Return type for a bounding box intersection function. */
|
||||
struct BoundingBoxIntersectionResult
|
||||
{
|
||||
/* Intersection return types. */
|
||||
|
||||
/* For a bounding box intersection function. */
|
||||
struct BoundingBoxIntersectionResult {
|
||||
bool accept [[accept_intersection]];
|
||||
bool continue_search [[continue_search]];
|
||||
float distance [[distance]];
|
||||
};
|
||||
|
||||
/* Return type for a triangle intersection function. */
|
||||
struct TriangleIntersectionResult
|
||||
{
|
||||
/* For a triangle intersection function. */
|
||||
struct TriangleIntersectionResult {
|
||||
bool accept [[accept_intersection]];
|
||||
bool continue_search [[continue_search]];
|
||||
bool continue_search [[continue_search]];
|
||||
};
|
||||
|
||||
enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
|
||||
|
||||
ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self,
|
||||
/* Utilities. */
|
||||
|
||||
ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self,
|
||||
const int object,
|
||||
const int prim)
|
||||
{
|
||||
return (self.prim == prim) && (self.object == object);
|
||||
}
|
||||
|
||||
ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self,
|
||||
ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self,
|
||||
const int object,
|
||||
const int prim)
|
||||
{
|
||||
@@ -43,12 +46,14 @@ ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimi
|
||||
((self.light_prim == prim) && (self.light_object == object));
|
||||
}
|
||||
|
||||
ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self,
|
||||
ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self,
|
||||
const int prim)
|
||||
{
|
||||
return (self.prim == prim);
|
||||
}
|
||||
|
||||
/* Hit functions. */
|
||||
|
||||
template<typename TReturn, uint intersection_type>
|
||||
TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
|
||||
@@ -58,7 +63,7 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
const float ray_tmax)
|
||||
{
|
||||
TReturn result;
|
||||
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
|
||||
@@ -101,7 +106,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
}
|
||||
else {
|
||||
if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
|
||||
/* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */
|
||||
/* Record closest intersection only. Do not terminate ray here, since there is no guarantee
|
||||
* about distance ordering in any-hit */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
@@ -116,8 +122,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
isect->object = object;
|
||||
isect->type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
isect->u = barycentrics.x;
|
||||
isect->v = barycentrics.y;
|
||||
|
||||
/* Record geometric normal */
|
||||
const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
|
||||
@@ -133,21 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
#endif
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
|
||||
TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||
uint instance_id [[user_instance_id]],
|
||||
uint primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||
uint instance_id [[user_instance_id]],
|
||||
uint primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
|
||||
launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
@@ -180,18 +185,14 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
return true;
|
||||
}
|
||||
|
||||
float u = 0.0f, v = 0.0f;
|
||||
const float u = barycentrics.x;
|
||||
const float v = barycentrics.y;
|
||||
int type = 0;
|
||||
if (intersection_type == METALRT_HIT_TRIANGLE) {
|
||||
u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
v = barycentrics.x;
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
else {
|
||||
u = barycentrics.x;
|
||||
v = barycentrics.y;
|
||||
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
@@ -215,7 +216,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
short num_recorded_hits = payload.num_recorded_hits;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
|
||||
|
||||
/* If no transparent shadows, all light is blocked and we can stop immediately. */
|
||||
if (num_hits >= max_hits ||
|
||||
!(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
|
||||
@@ -223,7 +224,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
/* terminate ray */
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (type & PRIMITIVE_CURVE) {
|
||||
float throughput = payload.throughput;
|
||||
@@ -240,10 +241,10 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
payload.num_hits += 1;
|
||||
payload.num_recorded_hits += 1;
|
||||
|
||||
|
||||
uint record_index = num_recorded_hits;
|
||||
|
||||
const IntegratorShadowState state = payload.state;
|
||||
@@ -278,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||
|
||||
|
||||
/* Continue tracing. */
|
||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
@@ -286,26 +287,25 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
return true;
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
|
||||
TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
|
||||
TriangleIntersectionResult result;
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
|
||||
launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
@@ -317,15 +317,16 @@ __anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]
|
||||
}
|
||||
|
||||
template<typename TReturnType, uint intersection_type>
|
||||
inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const float u)
|
||||
inline TReturnType metalrt_visibility_test(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const float u)
|
||||
{
|
||||
TReturnType result;
|
||||
|
||||
# ifdef __HAIR__
|
||||
|
||||
#ifdef __HAIR__
|
||||
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
|
||||
/* Filter out curve endcaps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
@@ -334,16 +335,16 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
|
||||
return result;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
uint visibility = payload.visibility;
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Shadow ray early termination. */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
@@ -371,16 +372,17 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
|
||||
TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]])
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, 0.0f);
|
||||
TriangleIntersectionResult result =
|
||||
metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, 0.0f);
|
||||
if (result.accept) {
|
||||
payload.prim = prim;
|
||||
payload.type = kernel_data_fetch(objects, object).primitive_type;
|
||||
@@ -388,8 +390,7 @@ __anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* Unused function */
|
||||
@@ -400,19 +401,21 @@ __anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Primitive intersection functions. */
|
||||
|
||||
#ifdef __HAIR__
|
||||
ccl_device_inline
|
||||
void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
ccl_device_inline void metalrt_intersection_curve(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
@@ -421,25 +424,16 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
|
||||
}
|
||||
# endif
|
||||
|
||||
float3 P = ray_origin;
|
||||
float3 dir = ray_direction;
|
||||
|
||||
/* The direction is not normalized by default, but the curve intersection routine expects that */
|
||||
float len;
|
||||
dir = normalize_len(dir, &len);
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
/* Transform maximum distance into object space. */
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
if (context.curve_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
result.distance = isect.t / len;
|
||||
result.distance = isect.t;
|
||||
payload.u = isect.u;
|
||||
payload.v = isect.v;
|
||||
payload.prim = prim;
|
||||
@@ -448,54 +442,41 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline
|
||||
void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
ccl_device_inline void metalrt_intersection_curve_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
const uint visibility = payload.visibility;
|
||||
|
||||
float3 P = ray_origin;
|
||||
float3 dir = ray_direction;
|
||||
|
||||
/* The direction is not normalized by default, but the curve intersection routine expects that */
|
||||
float len;
|
||||
dir = normalize_len(dir, &len);
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
/* Transform maximum distance into object space */
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
if (context.curve_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
|
||||
if (result.accept) {
|
||||
result.distance = isect.t / len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
@@ -508,28 +489,36 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
|
||||
result.distance = ray_tmax;
|
||||
|
||||
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
|
||||
metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
|
||||
metalrt_intersection_curve(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin, ray_tmax, result);
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
@@ -540,57 +529,73 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
|
||||
result.distance = ray_tmax;
|
||||
|
||||
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
|
||||
metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
|
||||
metalrt_intersection_curve_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin, ray_tmax, result);
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
|
||||
metalrt_intersection_curve(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin, ray_tmax, result);
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_all_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
@@ -600,31 +605,39 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
|
||||
metalrt_intersection_curve_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin, ray_tmax, result);
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* __HAIR__ */
|
||||
|
||||
#ifdef __POINTCLOUD__
|
||||
ccl_device_inline
|
||||
void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
ccl_device_inline void metalrt_intersection_point(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
@@ -633,25 +646,16 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
|
||||
}
|
||||
# endif
|
||||
|
||||
float3 P = ray_origin;
|
||||
float3 dir = ray_direction;
|
||||
|
||||
/* The direction is not normalized by default, but the point intersection routine expects that */
|
||||
float len;
|
||||
dir = normalize_len(dir, &len);
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
/* Transform maximum distance into object space. */
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
if (context.point_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
result.distance = isect.t / len;
|
||||
result.distance = isect.t;
|
||||
payload.u = isect.u;
|
||||
payload.v = isect.v;
|
||||
payload.prim = prim;
|
||||
@@ -660,50 +664,78 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline
|
||||
void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_origin,
|
||||
const float3 ray_direction,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
ccl_device_inline void metalrt_intersection_point_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
const uint visibility = payload.visibility;
|
||||
|
||||
float3 P = ray_origin;
|
||||
float3 dir = ray_direction;
|
||||
|
||||
/* The direction is not normalized by default, but the point intersection routine expects that */
|
||||
float len;
|
||||
dir = normalize_len(dir, &len);
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
/* Transform maximum distance into object space */
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
if (context.point_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
|
||||
if (result.accept) {
|
||||
result.distance = isect.t / len;
|
||||
result.distance = isect.t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
prim,
|
||||
type,
|
||||
ray_origin,
|
||||
ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
@@ -719,43 +751,21 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
|
||||
metalrt_intersection_point_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
prim,
|
||||
type,
|
||||
ray_origin,
|
||||
ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin, ray_tmax, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin, ray_tmax, result);
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -149,25 +149,13 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||
/* clang-format on */
|
||||
|
||||
/* Types */
|
||||
|
||||
/* It's not possible to use sycl types like sycl::float3, sycl::int3, etc
|
||||
* because these types have different interfaces from blender version */
|
||||
* because these types have different interfaces from blender version. */
|
||||
|
||||
using uchar = unsigned char;
|
||||
using sycl::half;
|
||||
|
||||
struct float3 {
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
ccl_always_inline float3 make_float3(float x, float y, float z)
|
||||
{
|
||||
return {x, y, z};
|
||||
}
|
||||
ccl_always_inline float3 make_float3(float x)
|
||||
{
|
||||
return {x, x, x};
|
||||
}
|
||||
|
||||
/* math functions */
|
||||
#define fabsf(x) sycl::fabs((x))
|
||||
#define copysignf(x, y) sycl::copysign((x), (y))
|
||||
|
@@ -6,7 +6,8 @@ DLL_INTERFACE_CALL(oneapi_device_capabilities, char *)
|
||||
DLL_INTERFACE_CALL(oneapi_free, void, void *)
|
||||
DLL_INTERFACE_CALL(oneapi_get_memcapacity, size_t, SyclQueue *queue)
|
||||
|
||||
DLL_INTERFACE_CALL(oneapi_get_compute_units_amount, size_t, SyclQueue *queue)
|
||||
DLL_INTERFACE_CALL(oneapi_get_num_multiprocessors, int, SyclQueue *queue)
|
||||
DLL_INTERFACE_CALL(oneapi_get_max_num_threads_per_multiprocessor, int, SyclQueue *queue)
|
||||
DLL_INTERFACE_CALL(oneapi_iterate_devices, void, OneAPIDeviceIteratorCallback cb, void *user_ptr)
|
||||
DLL_INTERFACE_CALL(oneapi_set_error_cb, void, OneAPIErrorCallback, void *user_ptr)
|
||||
|
||||
|
@@ -904,11 +904,26 @@ size_t oneapi_get_memcapacity(SyclQueue *queue)
|
||||
.get_info<sycl::info::device::global_mem_size>();
|
||||
}
|
||||
|
||||
size_t oneapi_get_compute_units_amount(SyclQueue *queue)
|
||||
int oneapi_get_num_multiprocessors(SyclQueue *queue)
|
||||
{
|
||||
return reinterpret_cast<sycl::queue *>(queue)
|
||||
->get_device()
|
||||
.get_info<sycl::info::device::max_compute_units>();
|
||||
const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
|
||||
if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
|
||||
return device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int oneapi_get_max_num_threads_per_multiprocessor(SyclQueue *queue)
|
||||
{
|
||||
const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
|
||||
if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
|
||||
device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
|
||||
return device.get_info<sycl::info::device::ext_intel_gpu_eu_simd_width>() *
|
||||
device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* WITH_ONEAPI */
|
||||
|
646
intern/cycles/kernel/device/optix/bvh.h
Normal file
646
intern/cycles/kernel/device/optix/bvh.h
Normal file
@@ -0,0 +1,646 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
/* OptiX implementation of ray-scene intersection. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/bvh/types.h"
|
||||
#include "kernel/bvh/util.h"
|
||||
|
||||
#define OPTIX_DEFINE_ABI_VERSION_ONLY
|
||||
#include <optix_function_table.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Utilities. */
|
||||
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
|
||||
{
|
||||
return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
|
||||
}
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
|
||||
{
|
||||
return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
|
||||
}
|
||||
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
|
||||
{
|
||||
return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
|
||||
}
|
||||
|
||||
ccl_device_forceinline int get_object_id()
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
/* Always get the instance ID from the TLAS
|
||||
* There might be a motion transform node between TLAS and BLAS which does not have one. */
|
||||
return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
||||
#else
|
||||
return optixGetInstanceId();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Hit/miss functions. */
|
||||
|
||||
extern "C" __global__ void __miss__kernel_optix_miss()
|
||||
{
|
||||
/* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
|
||||
optixSetPayload_5(PRIMITIVE_NONE);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_local_hit()
|
||||
{
|
||||
#if defined(__HAIR__) || defined(__POINTCLOUD__)
|
||||
if (!optixIsTriangleHit()) {
|
||||
/* Ignore curves and points. */
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
const int object = get_object_id();
|
||||
if (object != optixGetPayload_4() /* local_object */) {
|
||||
/* Only intersect with matching object. */
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
if (intersection_skip_self_local(ray->self, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
const uint max_hits = optixGetPayload_5();
|
||||
if (max_hits == 0) {
|
||||
/* Special case for when no hit information is requested, just report that something was hit */
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
}
|
||||
|
||||
int hit = 0;
|
||||
uint *const lcg_state = get_payload_ptr_0<uint>();
|
||||
LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
|
||||
|
||||
if (lcg_state) {
|
||||
for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (optixGetRayTmax() == local_isect->hits[i].t) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
|
||||
hit = local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits > max_hits) {
|
||||
hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
|
||||
if (hit >= max_hits) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
|
||||
/* Record closest intersection only.
|
||||
* Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
|
||||
*/
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
Intersection *isect = &local_isect->hits[hit];
|
||||
isect->t = optixGetRayTmax();
|
||||
isect->prim = prim;
|
||||
isect->object = get_object_id();
|
||||
isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
|
||||
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
isect->u = barycentrics.x;
|
||||
isect->v = barycentrics.y;
|
||||
|
||||
/* Record geometric normal. */
|
||||
const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
|
||||
const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
|
||||
const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
|
||||
const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
|
||||
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
/* Continue tracing (without this the trace call would return after the first hit). */
|
||||
optixIgnoreIntersection();
|
||||
#endif
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
|
||||
{
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
int prim = optixGetPrimitiveIndex();
|
||||
const uint object = get_object_id();
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
# endif
|
||||
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
if (intersection_skip_self_shadow(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
float u = 0.0f, v = 0.0f;
|
||||
int type = 0;
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
u = barycentrics.x;
|
||||
v = barycentrics.y;
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
|
||||
u = __uint_as_float(optixGetAttribute_0());
|
||||
v = __uint_as_float(optixGetAttribute_1());
|
||||
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
# if OPTIX_ABI_VERSION < 55
|
||||
/* Filter out curve end-caps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
# endif
|
||||
}
|
||||
# endif
|
||||
else {
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
u = 0.0f;
|
||||
v = 0.0f;
|
||||
}
|
||||
|
||||
# ifndef __TRANSPARENT_SHADOWS__
|
||||
/* No transparent shadows support compiled in, make opaque. */
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
# else
|
||||
const uint max_hits = optixGetPayload_3();
|
||||
const uint num_hits_packed = optixGetPayload_2();
|
||||
const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
|
||||
const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
|
||||
|
||||
/* If no transparent shadows, all light is blocked and we can stop immediately. */
|
||||
if (num_hits >= max_hits ||
|
||||
!(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
}
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (type & PRIMITIVE_CURVE) {
|
||||
float throughput = __uint_as_float(optixGetPayload_1());
|
||||
throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
|
||||
optixSetPayload_1(__float_as_uint(throughput));
|
||||
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
|
||||
|
||||
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
}
|
||||
else {
|
||||
/* Continue tracing. */
|
||||
optixIgnoreIntersection();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Record transparent intersection. */
|
||||
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
|
||||
|
||||
uint record_index = num_recorded_hits;
|
||||
|
||||
const IntegratorShadowState state = optixGetPayload_0();
|
||||
|
||||
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (record_index >= max_record_hits) {
|
||||
/* If maximum number of hits reached, find a hit to replace. */
|
||||
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
|
||||
uint max_recorded_hit = 0;
|
||||
|
||||
for (int i = 1; i < max_record_hits; i++) {
|
||||
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
|
||||
if (isect_t > max_recorded_t) {
|
||||
max_recorded_t = isect_t;
|
||||
max_recorded_hit = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (optixGetRayTmax() >= max_recorded_t) {
|
||||
/* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
|
||||
* current hit anymore. */
|
||||
return;
|
||||
}
|
||||
|
||||
record_index = max_recorded_hit;
|
||||
}
|
||||
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||
|
||||
/* Continue tracing. */
|
||||
optixIgnoreIntersection();
|
||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_volume_test()
|
||||
{
|
||||
#if defined(__HAIR__) || defined(__POINTCLOUD__)
|
||||
if (!optixIsTriangleHit()) {
|
||||
/* Ignore curves. */
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint object = get_object_id();
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
if (intersection_skip_self(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
|
||||
{
|
||||
#ifdef __HAIR__
|
||||
# if OPTIX_ABI_VERSION < 55
|
||||
if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
|
||||
/* Filter out curve end-caps. */
|
||||
const float u = __uint_as_float(optixGetAttribute_0());
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
const uint object = get_object_id();
|
||||
const uint visibility = optixGetPayload_4();
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
if (intersection_skip_self_shadow(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
else {
|
||||
/* Shadow ray early termination. */
|
||||
return optixTerminateRay();
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (intersection_skip_self(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void __closesthit__kernel_optix_hit()
|
||||
{
|
||||
const int object = get_object_id();
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
|
||||
optixSetPayload_4(object);
|
||||
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
optixSetPayload_1(__float_as_uint(barycentrics.x));
|
||||
optixSetPayload_2(__float_as_uint(barycentrics.y));
|
||||
optixSetPayload_3(prim);
|
||||
optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
|
||||
}
|
||||
else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
|
||||
optixSetPayload_2(optixGetAttribute_1());
|
||||
optixSetPayload_3(segment.prim);
|
||||
optixSetPayload_5(segment.type);
|
||||
}
|
||||
else {
|
||||
optixSetPayload_1(0);
|
||||
optixSetPayload_2(0);
|
||||
optixSetPayload_3(prim);
|
||||
optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
|
||||
}
|
||||
}
|
||||
|
||||
/* Custom primitive intersection functions. */
|
||||
|
||||
#ifdef __HAIR__
|
||||
ccl_device_inline void optix_intersection_curve(const int prim, const int type)
|
||||
{
|
||||
const int object = get_object_id();
|
||||
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
const float3 ray_P = optixGetObjectRayOrigin();
|
||||
const float3 ray_D = optixGetObjectRayDirection();
|
||||
const float ray_tmin = optixGetRayTmin();
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
const float time = optixGetRayTime();
|
||||
# else
|
||||
const float time = 0.0f;
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = optixGetRayTmax();
|
||||
|
||||
if (curve_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
|
||||
optixReportIntersection(isect.t,
|
||||
type & PRIMITIVE_ALL,
|
||||
__float_as_int(isect.u), /* Attribute_0 */
|
||||
__float_as_int(isect.v)); /* Attribute_1 */
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void __intersection__curve_ribbon()
|
||||
{
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
|
||||
const int prim = segment.prim;
|
||||
const int type = segment.type;
|
||||
if (type & PRIMITIVE_CURVE_RIBBON) {
|
||||
optix_intersection_curve(prim, type);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __POINTCLOUD__
|
||||
extern "C" __global__ void __intersection__point()
|
||||
{
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
const int object = get_object_id();
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
const float3 ray_P = optixGetObjectRayOrigin();
|
||||
const float3 ray_D = optixGetObjectRayDirection();
|
||||
const float ray_tmin = optixGetRayTmin();
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
const float time = optixGetRayTime();
|
||||
# else
|
||||
const float time = 0.0f;
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = optixGetRayTmax();
|
||||
|
||||
if (point_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
|
||||
optixReportIntersection(isect.t, type & PRIMITIVE_ALL);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Scene intersection. */
|
||||
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
const uint visibility,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
uint p0 = 0;
|
||||
uint p1 = 0;
|
||||
uint p2 = 0;
|
||||
uint p3 = 0;
|
||||
uint p4 = visibility;
|
||||
uint p5 = PRIMITIVE_NONE;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
|
||||
}
|
||||
|
||||
optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
ray_flags,
|
||||
0, /* SBT offset for PG_HITD */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
isect->t = __uint_as_float(p0);
|
||||
isect->u = __uint_as_float(p1);
|
||||
isect->v = __uint_as_float(p2);
|
||||
isect->prim = p3;
|
||||
isect->object = p4;
|
||||
isect->type = p5;
|
||||
|
||||
return p5 != PRIMITIVE_NONE;
|
||||
}
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private LocalIntersection *local_isect,
|
||||
int local_object,
|
||||
ccl_private uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
uint p0 = pointer_pack_to_uint_0(lcg_state);
|
||||
uint p1 = pointer_pack_to_uint_1(lcg_state);
|
||||
uint p2 = pointer_pack_to_uint_0(local_isect);
|
||||
uint p3 = pointer_pack_to_uint_1(local_isect);
|
||||
uint p4 = local_object;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
/* Is set to zero on miss or if ray is aborted, so can be used as return value. */
|
||||
uint p5 = max_hits;
|
||||
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0; /* Initialize hit count to zero. */
|
||||
}
|
||||
optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
0xFF,
|
||||
/* Need to always call into __anyhit__kernel_optix_local_hit. */
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
2, /* SBT offset for PG_HITL */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
return p5;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
ccl_private const Ray *ray,
|
||||
uint visibility,
|
||||
uint max_hits,
|
||||
ccl_private uint *num_recorded_hits,
|
||||
ccl_private float *throughput)
|
||||
{
|
||||
uint p0 = state;
|
||||
uint p1 = __float_as_uint(1.0f); /* Throughput. */
|
||||
uint p2 = 0; /* Number of hits. */
|
||||
uint p3 = max_hits;
|
||||
uint p4 = visibility;
|
||||
uint p5 = false;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
1, /* SBT offset for PG_HITS */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
*num_recorded_hits = uint16_unpack_from_uint_0(p2);
|
||||
*throughput = __uint_as_float(p1);
|
||||
|
||||
return p5;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private Intersection *isect,
|
||||
const uint visibility)
|
||||
{
|
||||
uint p0 = 0;
|
||||
uint p1 = 0;
|
||||
uint p2 = 0;
|
||||
uint p3 = 0;
|
||||
uint p4 = visibility;
|
||||
uint p5 = PRIMITIVE_NONE;
|
||||
uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
|
||||
uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
|
||||
|
||||
uint ray_mask = visibility & 0xFF;
|
||||
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_volume_test. */
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
3, /* SBT offset for PG_HITV */
|
||||
0,
|
||||
0,
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5,
|
||||
p6,
|
||||
p7);
|
||||
|
||||
isect->t = __uint_as_float(p0);
|
||||
isect->u = __uint_as_float(p1);
|
||||
isect->v = __uint_as_float(p2);
|
||||
isect->prim = p3;
|
||||
isect->object = p4;
|
||||
isect->type = p5;
|
||||
|
||||
return p5 != PRIMITIVE_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -8,7 +8,6 @@
|
||||
#include <optix.h>
|
||||
|
||||
#define __KERNEL_GPU__
|
||||
#define __KERNEL_GPU_RAYTRACING__
|
||||
#define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */
|
||||
#define __KERNEL_OPTIX__
|
||||
#define CCL_NAMESPACE_BEGIN
|
||||
|
@@ -20,34 +20,6 @@
|
||||
#include "kernel/integrator/intersect_volume_stack.h"
|
||||
// clang-format on
|
||||
|
||||
#define OPTIX_DEFINE_ABI_VERSION_ONLY
|
||||
#include <optix_function_table.h>
|
||||
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
|
||||
{
|
||||
return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
|
||||
}
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
|
||||
{
|
||||
return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
|
||||
}
|
||||
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
|
||||
{
|
||||
return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
|
||||
}
|
||||
|
||||
ccl_device_forceinline int get_object_id()
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
/* Always get the instance ID from the TLAS
|
||||
* There might be a motion transform node between TLAS and BLAS which does not have one. */
|
||||
return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
||||
#else
|
||||
return optixGetInstanceId();
|
||||
#endif
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
@@ -84,411 +56,3 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st
|
||||
integrator_intersect_volume_stack(nullptr, path_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __miss__kernel_optix_miss()
|
||||
{
|
||||
/* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
|
||||
optixSetPayload_5(PRIMITIVE_NONE);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_local_hit()
|
||||
{
|
||||
#if defined(__HAIR__) || defined(__POINTCLOUD__)
|
||||
if (!optixIsTriangleHit()) {
|
||||
/* Ignore curves and points. */
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
const int object = get_object_id();
|
||||
if (object != optixGetPayload_4() /* local_object */) {
|
||||
/* Only intersect with matching object. */
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
if (intersection_skip_self_local(ray->self, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
const uint max_hits = optixGetPayload_5();
|
||||
if (max_hits == 0) {
|
||||
/* Special case for when no hit information is requested, just report that something was hit */
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
}
|
||||
|
||||
int hit = 0;
|
||||
uint *const lcg_state = get_payload_ptr_0<uint>();
|
||||
LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
|
||||
|
||||
if (lcg_state) {
|
||||
for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (optixGetRayTmax() == local_isect->hits[i].t) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
|
||||
hit = local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits > max_hits) {
|
||||
hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
|
||||
if (hit >= max_hits) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
|
||||
/* Record closest intersection only.
|
||||
* Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
|
||||
*/
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
Intersection *isect = &local_isect->hits[hit];
|
||||
isect->t = optixGetRayTmax();
|
||||
isect->prim = prim;
|
||||
isect->object = get_object_id();
|
||||
isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
|
||||
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
|
||||
/* Record geometric normal. */
|
||||
const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
|
||||
const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
|
||||
const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
|
||||
const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
|
||||
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
/* Continue tracing (without this the trace call would return after the first hit). */
|
||||
optixIgnoreIntersection();
|
||||
#endif
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
|
||||
{
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
int prim = optixGetPrimitiveIndex();
|
||||
const uint object = get_object_id();
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
# endif
|
||||
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
if (intersection_skip_self_shadow(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
float u = 0.0f, v = 0.0f;
|
||||
int type = 0;
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
v = barycentrics.x;
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
|
||||
u = __uint_as_float(optixGetAttribute_0());
|
||||
v = __uint_as_float(optixGetAttribute_1());
|
||||
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
# if OPTIX_ABI_VERSION < 55
|
||||
/* Filter out curve endcaps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
# endif
|
||||
}
|
||||
# endif
|
||||
else {
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
u = 0.0f;
|
||||
v = 0.0f;
|
||||
}
|
||||
|
||||
# ifndef __TRANSPARENT_SHADOWS__
|
||||
/* No transparent shadows support compiled in, make opaque. */
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
# else
|
||||
const uint max_hits = optixGetPayload_3();
|
||||
const uint num_hits_packed = optixGetPayload_2();
|
||||
const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
|
||||
const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
|
||||
|
||||
/* If no transparent shadows, all light is blocked and we can stop immediately. */
|
||||
if (num_hits >= max_hits ||
|
||||
!(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
}
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (type & PRIMITIVE_CURVE) {
|
||||
float throughput = __uint_as_float(optixGetPayload_1());
|
||||
throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
|
||||
optixSetPayload_1(__float_as_uint(throughput));
|
||||
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
|
||||
|
||||
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
}
|
||||
else {
|
||||
/* Continue tracing. */
|
||||
optixIgnoreIntersection();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Record transparent intersection. */
|
||||
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
|
||||
|
||||
uint record_index = num_recorded_hits;
|
||||
|
||||
const IntegratorShadowState state = optixGetPayload_0();
|
||||
|
||||
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (record_index >= max_record_hits) {
|
||||
/* If maximum number of hits reached, find a hit to replace. */
|
||||
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
|
||||
uint max_recorded_hit = 0;
|
||||
|
||||
for (int i = 1; i < max_record_hits; i++) {
|
||||
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
|
||||
if (isect_t > max_recorded_t) {
|
||||
max_recorded_t = isect_t;
|
||||
max_recorded_hit = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (optixGetRayTmax() >= max_recorded_t) {
|
||||
/* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
|
||||
* current hit anymore. */
|
||||
return;
|
||||
}
|
||||
|
||||
record_index = max_recorded_hit;
|
||||
}
|
||||
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||
|
||||
/* Continue tracing. */
|
||||
optixIgnoreIntersection();
|
||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_volume_test()
|
||||
{
|
||||
#if defined(__HAIR__) || defined(__POINTCLOUD__)
|
||||
if (!optixIsTriangleHit()) {
|
||||
/* Ignore curves. */
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint object = get_object_id();
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
if (intersection_skip_self(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
|
||||
{
|
||||
#ifdef __HAIR__
|
||||
# if OPTIX_ABI_VERSION < 55
|
||||
if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
|
||||
/* Filter out curve endcaps. */
|
||||
const float u = __uint_as_float(optixGetAttribute_0());
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
const uint object = get_object_id();
|
||||
const uint visibility = optixGetPayload_4();
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
|
||||
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
if (intersection_skip_self_shadow(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
else {
|
||||
/* Shadow ray early termination. */
|
||||
return optixTerminateRay();
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (intersection_skip_self(ray->self, object, prim)) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void __closesthit__kernel_optix_hit()
|
||||
{
|
||||
const int object = get_object_id();
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
|
||||
optixSetPayload_4(object);
|
||||
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
|
||||
optixSetPayload_2(__float_as_uint(barycentrics.x));
|
||||
optixSetPayload_3(prim);
|
||||
optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
|
||||
}
|
||||
else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
|
||||
optixSetPayload_2(optixGetAttribute_1());
|
||||
optixSetPayload_3(segment.prim);
|
||||
optixSetPayload_5(segment.type);
|
||||
}
|
||||
else {
|
||||
optixSetPayload_1(0);
|
||||
optixSetPayload_2(0);
|
||||
optixSetPayload_3(prim);
|
||||
optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __HAIR__
|
||||
ccl_device_inline void optix_intersection_curve(const int prim, const int type)
|
||||
{
|
||||
const int object = get_object_id();
|
||||
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
float3 P = optixGetObjectRayOrigin();
|
||||
float3 dir = optixGetObjectRayDirection();
|
||||
float tmin = optixGetRayTmin();
|
||||
|
||||
/* The direction is not normalized by default, but the curve intersection routine expects that */
|
||||
float len;
|
||||
dir = normalize_len(dir, &len);
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
const float time = optixGetRayTime();
|
||||
# else
|
||||
const float time = 0.0f;
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = optixGetRayTmax();
|
||||
/* Transform maximum distance into object space. */
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
if (curve_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
|
||||
static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
|
||||
optixReportIntersection(isect.t / len,
|
||||
type & PRIMITIVE_ALL,
|
||||
__float_as_int(isect.u), /* Attribute_0 */
|
||||
__float_as_int(isect.v)); /* Attribute_1 */
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void __intersection__curve_ribbon()
|
||||
{
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
|
||||
const int prim = segment.prim;
|
||||
const int type = segment.type;
|
||||
if (type & PRIMITIVE_CURVE_RIBBON) {
|
||||
optix_intersection_curve(prim, type);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __POINTCLOUD__
|
||||
extern "C" __global__ void __intersection__point()
|
||||
{
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
const int object = get_object_id();
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
float3 P = optixGetObjectRayOrigin();
|
||||
float3 dir = optixGetObjectRayDirection();
|
||||
float tmin = optixGetRayTmin();
|
||||
|
||||
/* The direction is not normalized by default, the point intersection routine expects that. */
|
||||
float len;
|
||||
dir = normalize_len(dir, &len);
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
const float time = optixGetRayTime();
|
||||
# else
|
||||
const float time = 0.0f;
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = optixGetRayTmax();
|
||||
/* Transform maximum distance into object space. */
|
||||
if (isect.t != FLT_MAX) {
|
||||
isect.t *= len;
|
||||
}
|
||||
|
||||
if (point_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
|
||||
static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
|
||||
optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -72,7 +72,7 @@ ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const floa
|
||||
ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
|
||||
const float3 cylinder_end,
|
||||
const float cylinder_radius,
|
||||
const float3 ray_dir,
|
||||
const float3 ray_D,
|
||||
ccl_private float2 *t_o,
|
||||
ccl_private float *u0_o,
|
||||
ccl_private float3 *Ng0_o,
|
||||
@@ -82,7 +82,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
|
||||
/* Calculate quadratic equation to solve. */
|
||||
const float rl = 1.0f / len(cylinder_end - cylinder_start);
|
||||
const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
|
||||
const float3 O = -P0, dO = ray_dir;
|
||||
const float3 O = -P0, dO = ray_D;
|
||||
|
||||
const float dOdO = dot(dO, dO);
|
||||
const float OdO = dot(dO, O);
|
||||
@@ -123,7 +123,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
|
||||
/* Calculates u and Ng for near hit. */
|
||||
{
|
||||
*u0_o = (t0 * dOz + Oz) * rl;
|
||||
const float3 Pr = t0 * ray_dir;
|
||||
const float3 Pr = t0 * ray_D;
|
||||
const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
|
||||
*Ng0_o = Pr - Pl;
|
||||
}
|
||||
@@ -131,7 +131,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
|
||||
/* Calculates u and Ng for far hit. */
|
||||
{
|
||||
*u1_o = (t1 * dOz + Oz) * rl;
|
||||
const float3 Pr = t1 * ray_dir;
|
||||
const float3 Pr = t1 * ray_D;
|
||||
const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
|
||||
*Ng1_o = Pr - Pl;
|
||||
}
|
||||
@@ -141,10 +141,10 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
|
||||
return true;
|
||||
}
|
||||
|
||||
ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
|
||||
ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_D)
|
||||
{
|
||||
const float3 O = -P;
|
||||
const float3 D = ray_dir;
|
||||
const float3 D = ray_D;
|
||||
const float ON = dot(O, N);
|
||||
const float DN = dot(D, N);
|
||||
const float min_rcp_input = 1e-18f;
|
||||
@@ -155,7 +155,7 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
|
||||
return make_float2(lower, upper);
|
||||
}
|
||||
|
||||
ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
ccl_device bool curve_intersect_iterative(const float3 ray_D,
|
||||
const float ray_tmin,
|
||||
ccl_private float *ray_tmax,
|
||||
const float dt,
|
||||
@@ -165,7 +165,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
const bool use_backfacing,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
const float length_ray_dir = len(ray_dir);
|
||||
const float length_ray_D = len(ray_D);
|
||||
|
||||
/* Error of curve evaluations is proportional to largest coordinate. */
|
||||
const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
|
||||
@@ -176,9 +176,9 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
const float radius_max = box_max.w;
|
||||
|
||||
for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
|
||||
const float3 Q = ray_dir * t;
|
||||
const float3 dQdt = ray_dir;
|
||||
const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
|
||||
const float3 Q = ray_D * t;
|
||||
const float3 dQdt = ray_D;
|
||||
const float Q_err = 16.0f * FLT_EPSILON * length_ray_D * t;
|
||||
|
||||
const float4 P4 = catmull_rom_basis_eval(curve, u);
|
||||
const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
|
||||
@@ -233,7 +233,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
const float3 U = dradiusdu * R + dPdu;
|
||||
const float3 V = cross(dPdu, R);
|
||||
const float3 Ng = cross(V, U);
|
||||
if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
|
||||
if (!use_backfacing && dot(ray_D, Ng) > 0.0f) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -249,8 +249,8 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
|
||||
return false;
|
||||
}
|
||||
|
||||
ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
const float3 ray_dir,
|
||||
ccl_device bool curve_intersect_recursive(const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
const float ray_tmin,
|
||||
float ray_tmax,
|
||||
float4 curve[4],
|
||||
@@ -258,8 +258,8 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
{
|
||||
/* Move ray closer to make intersection stable. */
|
||||
const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
|
||||
const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
|
||||
const float3 ref = ray_orig + ray_dir * dt;
|
||||
const float dt = dot(center - ray_P, ray_D) / dot(ray_D, ray_D);
|
||||
const float3 ref = ray_P + ray_D * dt;
|
||||
const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
|
||||
curve[0] -= ref4;
|
||||
curve[1] -= ref4;
|
||||
@@ -322,7 +322,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
valid = cylinder_intersect(float4_to_float3(P0),
|
||||
float4_to_float3(P3),
|
||||
r_outer,
|
||||
ray_dir,
|
||||
ray_D,
|
||||
&tc_outer,
|
||||
&u_outer0,
|
||||
&Ng_outer0,
|
||||
@@ -335,11 +335,10 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
/* Intersect with cap-planes. */
|
||||
float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt);
|
||||
tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
|
||||
const float2 h0 = half_plane_intersect(
|
||||
float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
|
||||
const float2 h0 = half_plane_intersect(float4_to_float3(P0), float4_to_float3(dP0du), ray_D);
|
||||
tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
|
||||
const float2 h1 = half_plane_intersect(
|
||||
float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
|
||||
float4_to_float3(P3), -float4_to_float3(dP3du), ray_D);
|
||||
tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
|
||||
valid = tp.x <= tp.y;
|
||||
if (!valid) {
|
||||
@@ -359,7 +358,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
|
||||
float4_to_float3(P3),
|
||||
r_inner,
|
||||
ray_dir,
|
||||
ray_D,
|
||||
&tc_inner,
|
||||
&u_inner0,
|
||||
&Ng_inner0,
|
||||
@@ -369,9 +368,9 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
/* At the unstable area we subdivide deeper. */
|
||||
# if 0
|
||||
const bool unstable0 = (!valid_inner) |
|
||||
(fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
|
||||
(fabsf(dot(normalize(ray_D), normalize(Ng_inner0))) < 0.3f);
|
||||
const bool unstable1 = (!valid_inner) |
|
||||
(fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
|
||||
(fabsf(dot(normalize(ray_D), normalize(Ng_inner1))) < 0.3f);
|
||||
# else
|
||||
/* On the GPU appears to be a little faster if always enabled. */
|
||||
(void)valid_inner;
|
||||
@@ -396,7 +395,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
CURVE_NUM_BEZIER_SUBDIVISIONS;
|
||||
if (depth >= termDepth) {
|
||||
found |= curve_intersect_iterative(
|
||||
ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
|
||||
ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
|
||||
}
|
||||
else {
|
||||
recurse = true;
|
||||
@@ -409,7 +408,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
|
||||
CURVE_NUM_BEZIER_SUBDIVISIONS;
|
||||
if (depth >= termDepth) {
|
||||
found |= curve_intersect_iterative(
|
||||
ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
|
||||
ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
|
||||
}
|
||||
else {
|
||||
recurse = true;
|
||||
@@ -519,13 +518,16 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin,
|
||||
return true;
|
||||
}
|
||||
|
||||
ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
|
||||
ccl_device_inline void ribbon_ray_space(const float3 ray_D,
|
||||
const float ray_D_invlen,
|
||||
float3 ray_space[3])
|
||||
{
|
||||
const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
|
||||
const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
|
||||
const float3 D = ray_D * ray_D_invlen;
|
||||
const float3 dx0 = make_float3(0, D.z, -D.y);
|
||||
const float3 dx1 = make_float3(-D.z, 0, D.x);
|
||||
ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
|
||||
ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
|
||||
ray_space[2] = ray_dir;
|
||||
ray_space[1] = normalize(cross(D, ray_space[0]));
|
||||
ray_space[2] = D * ray_D_invlen;
|
||||
}
|
||||
|
||||
ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
|
||||
@@ -537,7 +539,7 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
|
||||
}
|
||||
|
||||
ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
const float3 ray_dir,
|
||||
const float3 ray_D,
|
||||
const float ray_tmin,
|
||||
float ray_tmax,
|
||||
const int N,
|
||||
@@ -545,8 +547,9 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
/* Transform control points into ray space. */
|
||||
const float ray_D_invlen = 1.0f / len(ray_D);
|
||||
float3 ray_space[3];
|
||||
ribbon_ray_space(ray_dir, ray_space);
|
||||
ribbon_ray_space(ray_D, ray_D_invlen, ray_space);
|
||||
|
||||
curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
|
||||
curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
|
||||
@@ -594,7 +597,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
const float avoidance_factor = 2.0f;
|
||||
if (avoidance_factor != 0.0f) {
|
||||
float r = mix(p0.w, p1.w, vu);
|
||||
valid0 = vt > avoidance_factor * r;
|
||||
valid0 = vt > avoidance_factor * r * ray_D_invlen;
|
||||
}
|
||||
|
||||
if (valid0) {
|
||||
@@ -619,8 +622,8 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
|
||||
|
||||
ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
|
||||
ccl_private Intersection *isect,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
int object,
|
||||
@@ -651,7 +654,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
|
||||
if (type & PRIMITIVE_CURVE_RIBBON) {
|
||||
/* todo: adaptive number of subdivisions could help performance here. */
|
||||
const int subdivisions = kernel_data.bvh.curve_subdivisions;
|
||||
if (ribbon_intersect(P, dir, tmin, tmax, subdivisions, curve, isect)) {
|
||||
if (ribbon_intersect(ray_P, ray_D, tmin, tmax, subdivisions, curve, isect)) {
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
@@ -661,7 +664,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
if (curve_intersect_recursive(P, dir, tmin, tmax, curve, isect)) {
|
||||
if (curve_intersect_recursive(ray_P, ray_D, tmin, tmax, curve, isect)) {
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
|
@@ -27,8 +27,8 @@ ccl_device_inline float3 motion_triangle_point_from_uv(KernelGlobals kg,
|
||||
const float v,
|
||||
float3 verts[3])
|
||||
{
|
||||
float w = 1.0f - u - v;
|
||||
float3 P = u * verts[0] + v * verts[1] + w * verts[2];
|
||||
/* This appears to give slightly better precision than interpolating with w = (1 - u - v). */
|
||||
float3 P = verts[0] + u * (verts[1] - verts[0]) + v * (verts[2] - verts[0]);
|
||||
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
|
@@ -86,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
|
||||
Transform tfm = object_fetch_transform_motion(kg, object, time);
|
||||
|
||||
if (itfm)
|
||||
*itfm = transform_quick_inverse(tfm);
|
||||
*itfm = transform_inverse(tfm);
|
||||
|
||||
return tfm;
|
||||
}
|
||||
@@ -488,127 +488,54 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
|
||||
|
||||
/* Transform ray into object space to enter static object in BVH */
|
||||
|
||||
ccl_device_inline float bvh_instance_push(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir)
|
||||
ccl_device_inline void bvh_instance_push(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir)
|
||||
{
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
|
||||
*P = transform_point(&tfm, ray->P);
|
||||
|
||||
float len;
|
||||
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/* Transform ray to exit static object in BVH. */
|
||||
|
||||
ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir,
|
||||
float t)
|
||||
{
|
||||
if (t != FLT_MAX) {
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
t /= len(transform_direction(&tfm, ray->D));
|
||||
}
|
||||
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Same as above, but returns scale factor to apply to multiple intersection distances */
|
||||
|
||||
ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir,
|
||||
ccl_private float *t_fac)
|
||||
{
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
*t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
|
||||
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*dir = bvh_clamp_direction(transform_direction(&tfm, ray->D));
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
}
|
||||
|
||||
#ifdef __OBJECT_MOTION__
|
||||
/* Transform ray into object space to enter motion blurred object in BVH */
|
||||
|
||||
ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir,
|
||||
ccl_private Transform *itfm)
|
||||
{
|
||||
object_fetch_transform_motion_test(kg, object, ray->time, itfm);
|
||||
|
||||
*P = transform_point(itfm, ray->P);
|
||||
|
||||
float len;
|
||||
*dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/* Transform ray to exit motion blurred object in BVH. */
|
||||
|
||||
ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
|
||||
ccl_device_inline void bvh_instance_motion_push(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir,
|
||||
float t,
|
||||
ccl_private Transform *itfm)
|
||||
ccl_private float3 *idir)
|
||||
{
|
||||
if (t != FLT_MAX) {
|
||||
t /= len(transform_direction(itfm, ray->D));
|
||||
}
|
||||
Transform tfm;
|
||||
object_fetch_transform_motion_test(kg, object, ray->time, &tfm);
|
||||
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
*P = transform_point(&tfm, ray->P);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Same as above, but returns scale factor to apply to multiple intersection distances */
|
||||
|
||||
ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
|
||||
int object,
|
||||
ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir,
|
||||
ccl_private float *t_fac,
|
||||
ccl_private Transform *itfm)
|
||||
{
|
||||
*t_fac = 1.0f / len(transform_direction(itfm, ray->D));
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*dir = bvh_clamp_direction(transform_direction(&tfm, ray->D));
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Transform ray to exit static object in BVH. */
|
||||
|
||||
ccl_device_inline void bvh_instance_pop(ccl_private const Ray *ray,
|
||||
ccl_private float3 *P,
|
||||
ccl_private float3 *dir,
|
||||
ccl_private float3 *idir)
|
||||
{
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
}
|
||||
|
||||
/* TODO: This can be removed when we know if no devices will require explicit
|
||||
* address space qualifiers for this case. */
|
||||
|
||||
|
@@ -10,20 +10,20 @@ CCL_NAMESPACE_BEGIN
|
||||
#ifdef __POINTCLOUD__
|
||||
|
||||
ccl_device_forceinline bool point_intersect_test(const float4 point,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
ccl_private float *t)
|
||||
{
|
||||
const float3 center = float4_to_float3(point);
|
||||
const float radius = point.w;
|
||||
|
||||
const float rd2 = 1.0f / dot(dir, dir);
|
||||
const float rd2 = 1.0f / dot(ray_D, ray_D);
|
||||
|
||||
const float3 c0 = center - P;
|
||||
const float projC0 = dot(c0, dir) * rd2;
|
||||
const float3 perp = c0 - projC0 * dir;
|
||||
const float3 c0 = center - ray_P;
|
||||
const float projC0 = dot(c0, ray_D) * rd2;
|
||||
const float3 perp = c0 - projC0 * ray_D;
|
||||
const float l2 = dot(perp, perp);
|
||||
const float r2 = radius * radius;
|
||||
if (!(l2 <= r2)) {
|
||||
@@ -32,12 +32,12 @@ ccl_device_forceinline bool point_intersect_test(const float4 point,
|
||||
|
||||
const float td = sqrt((r2 - l2) * rd2);
|
||||
const float t_front = projC0 - td;
|
||||
const bool valid_front = (tmin <= t_front) & (t_front <= tmax);
|
||||
const bool valid_front = (ray_tmin <= t_front) & (t_front <= ray_tmax);
|
||||
|
||||
/* Always back-face culling for now. */
|
||||
# if 0
|
||||
const float t_back = projC0 + td;
|
||||
const bool valid_back = (tmin <= t_back) & (t_back <= tmax);
|
||||
const bool valid_back = (ray_tmin <= t_back) & (t_back <= ray_tmax);
|
||||
|
||||
/* check if there is a first hit */
|
||||
const bool valid_first = valid_front | valid_back;
|
||||
@@ -58,10 +58,10 @@ ccl_device_forceinline bool point_intersect_test(const float4 point,
|
||||
|
||||
ccl_device_forceinline bool point_intersect(KernelGlobals kg,
|
||||
ccl_private Intersection *isect,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
const int object,
|
||||
const int prim,
|
||||
const float time,
|
||||
@@ -70,7 +70,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
|
||||
const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) :
|
||||
kernel_data_fetch(points, prim);
|
||||
|
||||
if (!point_intersect_test(point, P, dir, tmin, tmax, &isect->t)) {
|
||||
if (!point_intersect_test(point, ray_P, ray_D, ray_tmin, ray_tmax, &isect->t)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -18,7 +18,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals kg,
|
||||
{
|
||||
if (sd->object_flag & SD_OBJECT_MOTION) {
|
||||
sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time);
|
||||
sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion);
|
||||
sd->ob_itfm_motion = transform_inverse(sd->ob_tfm_motion);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -94,11 +94,11 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
|
||||
float2 uv[3];
|
||||
subd_triangle_patch_uv(kg, sd, uv);
|
||||
|
||||
float2 dpdu = uv[0] - uv[2];
|
||||
float2 dpdv = uv[1] - uv[2];
|
||||
float2 dpdu = uv[1] - uv[0];
|
||||
float2 dpdv = uv[2] - uv[0];
|
||||
|
||||
/* p is [s, t] */
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
|
||||
|
||||
float a, dads, dadt;
|
||||
a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
|
||||
@@ -165,12 +165,12 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_CORNER) {
|
||||
float2 uv[3];
|
||||
@@ -195,12 +195,12 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
|
||||
if (dx)
|
||||
@@ -233,11 +233,11 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
|
||||
float2 uv[3];
|
||||
subd_triangle_patch_uv(kg, sd, uv);
|
||||
|
||||
float2 dpdu = uv[0] - uv[2];
|
||||
float2 dpdv = uv[1] - uv[2];
|
||||
float2 dpdu = uv[1] - uv[0];
|
||||
float2 dpdv = uv[2] - uv[0];
|
||||
|
||||
/* p is [s, t] */
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
|
||||
|
||||
float2 a, dads, dadt;
|
||||
|
||||
@@ -305,12 +305,12 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_CORNER) {
|
||||
float2 uv[3];
|
||||
@@ -337,12 +337,12 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
|
||||
if (dx)
|
||||
@@ -375,11 +375,11 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
|
||||
float2 uv[3];
|
||||
subd_triangle_patch_uv(kg, sd, uv);
|
||||
|
||||
float2 dpdu = uv[0] - uv[2];
|
||||
float2 dpdv = uv[1] - uv[2];
|
||||
float2 dpdu = uv[1] - uv[0];
|
||||
float2 dpdv = uv[2] - uv[0];
|
||||
|
||||
/* p is [s, t] */
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
|
||||
|
||||
float3 a, dads, dadt;
|
||||
a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
|
||||
@@ -446,12 +446,12 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_CORNER) {
|
||||
float2 uv[3];
|
||||
@@ -478,12 +478,12 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
|
||||
if (dx)
|
||||
@@ -516,11 +516,11 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
|
||||
float2 uv[3];
|
||||
subd_triangle_patch_uv(kg, sd, uv);
|
||||
|
||||
float2 dpdu = uv[0] - uv[2];
|
||||
float2 dpdv = uv[1] - uv[2];
|
||||
float2 dpdu = uv[1] - uv[0];
|
||||
float2 dpdv = uv[2] - uv[0];
|
||||
|
||||
/* p is [s, t] */
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
|
||||
float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
|
||||
|
||||
float4 a, dads, dadt;
|
||||
if (desc.type == NODE_ATTR_RGBA) {
|
||||
@@ -592,12 +592,12 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
|
||||
float2 uv[3];
|
||||
@@ -636,12 +636,12 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
|
||||
*dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
|
||||
*dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
|
||||
#endif
|
||||
|
||||
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
|
||||
return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
|
||||
}
|
||||
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
|
||||
if (dx)
|
||||
|
@@ -45,8 +45,8 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg,
|
||||
float3 v1 = kernel_data_fetch(tri_verts, tri_vindex.w + 1);
|
||||
float3 v2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2);
|
||||
/* compute point */
|
||||
float t = 1.0f - u - v;
|
||||
*P = (u * v0 + v * v1 + t * v2);
|
||||
float w = 1.0f - u - v;
|
||||
*P = (w * v0 + u * v1 + v * v2);
|
||||
/* get object flags */
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
/* compute normal */
|
||||
@@ -97,7 +97,7 @@ triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
|
||||
float3 n1 = kernel_data_fetch(tri_vnormal, tri_vindex.y);
|
||||
float3 n2 = kernel_data_fetch(tri_vnormal, tri_vindex.z);
|
||||
|
||||
float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
|
||||
float3 N = safe_normalize((1.0f - u - v) * n0 + u * n1 + v * n2);
|
||||
|
||||
return is_zero(N) ? Ng : N;
|
||||
}
|
||||
@@ -118,7 +118,7 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(
|
||||
object_inverse_normal_transform(kg, sd, &n2);
|
||||
}
|
||||
|
||||
float3 N = (1.0f - u - v) * n2 + u * n0 + v * n1;
|
||||
float3 N = (1.0f - u - v) * n0 + u * n1 + v * n2;
|
||||
|
||||
return is_zero(N) ? Ng : N;
|
||||
}
|
||||
@@ -137,8 +137,8 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals kg,
|
||||
const float3 p2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2);
|
||||
|
||||
/* compute derivatives of P w.r.t. uv */
|
||||
*dPdu = (p0 - p2);
|
||||
*dPdv = (p1 - p2);
|
||||
*dPdu = (p1 - p0);
|
||||
*dPdv = (p2 - p0);
|
||||
}
|
||||
|
||||
/* Reading attributes on various triangle elements */
|
||||
@@ -167,12 +167,12 @@ ccl_device float triangle_attribute_float(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
|
||||
*dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
|
||||
*dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
|
||||
#endif
|
||||
|
||||
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
|
||||
return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
|
||||
}
|
||||
else {
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
@@ -217,12 +217,12 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
|
||||
*dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
|
||||
*dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
|
||||
#endif
|
||||
|
||||
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
|
||||
return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
|
||||
}
|
||||
else {
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
@@ -267,12 +267,12 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
|
||||
*dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
|
||||
*dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
|
||||
#endif
|
||||
|
||||
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
|
||||
return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
|
||||
}
|
||||
else {
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
@@ -328,12 +328,12 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx)
|
||||
*dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
|
||||
*dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
|
||||
if (dy)
|
||||
*dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
|
||||
*dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
|
||||
#endif
|
||||
|
||||
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
|
||||
return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
|
||||
}
|
||||
else {
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
|
@@ -145,9 +145,9 @@ ccl_device_inline float3 triangle_point_from_uv(KernelGlobals kg,
|
||||
const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
|
||||
float w = 1.0f - u - v;
|
||||
|
||||
float3 P = u * tri_a + v * tri_b + w * tri_c;
|
||||
/* This appears to give slightly better precision than interpolating with w = (1 - u - v). */
|
||||
float3 P = tri_a + u * (tri_b - tri_a) + v * (tri_c - tri_a);
|
||||
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
|
@@ -155,6 +155,11 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
||||
1.0f - u);
|
||||
}
|
||||
|
||||
/* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */
|
||||
const float tmp = u;
|
||||
u = v;
|
||||
v = 1.0f - tmp - v;
|
||||
|
||||
/* Position and normal on triangle. */
|
||||
const int object = kernel_data.bake.object_index;
|
||||
float3 P, Ng;
|
||||
|
@@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k
|
||||
}
|
||||
|
||||
#ifdef __TRANSPARENT_SHADOWS__
|
||||
# if defined(__KERNEL_CPU__)
|
||||
# ifndef __KERNEL_GPU__
|
||||
ccl_device int shadow_intersections_compare(const void *a, const void *b)
|
||||
{
|
||||
const Intersection *isect_a = (const Intersection *)a;
|
||||
|
@@ -38,8 +38,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
|
||||
uint num_hits = scene_intersect_volume_all(
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
if (num_hits > 0) {
|
||||
Intersection *isect = hits;
|
||||
|
||||
@@ -108,8 +107,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
|
||||
uint num_hits = scene_intersect_volume_all(
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
if (num_hits > 0) {
|
||||
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
|
||||
Intersection *isect = hits;
|
||||
|
@@ -186,7 +186,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
|
||||
triangle_vertices_and_normals(kg, sd_vtx->prim, verts, normals);
|
||||
|
||||
/* Compute refined position (same code as in triangle_point_from_uv). */
|
||||
sd_vtx->P = isect->u * verts[0] + isect->v * verts[1] + (1.f - isect->u - isect->v) * verts[2];
|
||||
sd_vtx->P = (1.f - isect->u - isect->v) * verts[0] + isect->u * verts[1] + isect->v * verts[2];
|
||||
if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd_vtx);
|
||||
sd_vtx->P = transform_point(&tfm, sd_vtx->P);
|
||||
@@ -213,8 +213,8 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
|
||||
}
|
||||
|
||||
/* Tangent space (position derivatives) WRT barycentric (u, v). */
|
||||
float3 dp_du = verts[0] - verts[2];
|
||||
float3 dp_dv = verts[1] - verts[2];
|
||||
float3 dp_du = verts[1] - verts[0];
|
||||
float3 dp_dv = verts[2] - verts[0];
|
||||
|
||||
/* Geometric normal. */
|
||||
vtx->ng = normalize(cross(dp_du, dp_dv));
|
||||
@@ -223,16 +223,16 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
|
||||
|
||||
/* Shading normals: Interpolate normals between vertices. */
|
||||
float n_len;
|
||||
vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v +
|
||||
normals[2] * (1.0f - sd_vtx->u - sd_vtx->v),
|
||||
vtx->n = normalize_len(normals[0] * (1.0f - sd_vtx->u - sd_vtx->v) + normals[1] * sd_vtx->u +
|
||||
normals[2] * sd_vtx->v,
|
||||
&n_len);
|
||||
|
||||
/* Shading normal derivatives WRT barycentric (u, v)
|
||||
* we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using:
|
||||
* d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */
|
||||
const float inv_n_len = 1.f / n_len;
|
||||
float3 dn_du = inv_n_len * (normals[0] - normals[2]);
|
||||
float3 dn_dv = inv_n_len * (normals[1] - normals[2]);
|
||||
float3 dn_du = inv_n_len * (normals[1] - normals[0]);
|
||||
float3 dn_dv = inv_n_len * (normals[2] - normals[0]);
|
||||
dn_du -= vtx->n * dot(vtx->n, dn_du);
|
||||
dn_dv -= vtx->n * dot(vtx->n, dn_dv);
|
||||
|
||||
|
@@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
|
||||
ccl_device_inline void path_state_init_queues(IntegratorState state)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
|
||||
INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
|
||||
#endif
|
||||
|
@@ -140,7 +140,7 @@ typedef struct IntegratorStateGPU {
|
||||
* happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
|
||||
* from a kernel which operates on a shadow catcher state will cause bad memory access. */
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
/* Scalar access on CPU. */
|
||||
|
||||
@@ -159,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
|
||||
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
|
||||
((state)->nested_struct[array_index].member)
|
||||
|
||||
#else /* __KERNEL_CPU__ */
|
||||
#else /* !__KERNEL_GPU__ */
|
||||
|
||||
/* Array access on GPU with Structure-of-Arrays. */
|
||||
|
||||
@@ -180,6 +180,6 @@ typedef int ConstIntegratorShadowState;
|
||||
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
|
||||
INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
|
||||
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
#endif /* !__KERNEL_GPU__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -338,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl
|
||||
return to_state;
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
|
||||
{
|
||||
return INTEGRATOR_STATE(state, path, bounce);
|
||||
|
@@ -126,17 +126,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
/* Transform normal to world space. */
|
||||
Transform itfm;
|
||||
Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
|
||||
object_fetch_transform_motion_test(kg, object, time, &itfm);
|
||||
hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
|
||||
|
||||
/* Transform t to world space, except for OptiX and MetalRT where it already is. */
|
||||
#ifdef __KERNEL_GPU_RAYTRACING__
|
||||
(void)tfm;
|
||||
#else
|
||||
float3 D = transform_direction(&itfm, ray.D);
|
||||
D = normalize(D) * ss_isect.hits[hit].t;
|
||||
ss_isect.hits[hit].t = len(transform_direction(&tfm, D));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Quickly retrieve P and Ng without setting up ShaderData. */
|
||||
|
@@ -205,12 +205,6 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
||||
ray.self.light_object = OBJECT_NONE;
|
||||
ray.self.light_prim = PRIM_NONE;
|
||||
|
||||
#ifndef __KERNEL_GPU_RAYTRACING__
|
||||
/* Compute or fetch object transforms. */
|
||||
Transform ob_itfm ccl_optional_struct_init;
|
||||
Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
|
||||
#endif
|
||||
|
||||
/* Convert subsurface to volume coefficients.
|
||||
* The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
|
||||
const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
|
||||
@@ -383,15 +377,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
||||
hit = (ss_isect.num_hits > 0);
|
||||
|
||||
if (hit) {
|
||||
#ifdef __KERNEL_GPU_RAYTRACING__
|
||||
/* t is always in world space with OptiX and MetalRT. */
|
||||
ray.tmax = ss_isect.hits[0].t;
|
||||
#else
|
||||
/* Compute world space distance to surface hit. */
|
||||
float3 D = transform_direction(&ob_itfm, ray.D);
|
||||
D = normalize(D) * ss_isect.hits[0].t;
|
||||
ray.tmax = len(transform_direction(&ob_tfm, D));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (bounce == 0) {
|
||||
|
@@ -137,8 +137,9 @@ ccl_device_inline float3 shadow_ray_smooth_surface_offset(
|
||||
triangle_vertices_and_normals(kg, sd->prim, V, N);
|
||||
}
|
||||
|
||||
const float u = sd->u, v = sd->v;
|
||||
const float w = 1 - u - v;
|
||||
const float u = 1.0f - sd->u - sd->v;
|
||||
const float v = sd->u;
|
||||
const float w = sd->v;
|
||||
float3 P = V[0] * u + V[1] * v + V[2] * w; /* Local space */
|
||||
float3 n = N[0] * u + N[1] * v + N[2] * w; /* We get away without normalization */
|
||||
|
||||
|
@@ -20,7 +20,7 @@ shader node_geometry(normal NormalIn = N,
|
||||
Normal = NormalIn;
|
||||
TrueNormal = Ng;
|
||||
Incoming = I;
|
||||
Parametric = point(u, v, 0.0);
|
||||
Parametric = point(1.0 - u - v, u, 0.0);
|
||||
Backfacing = backfacing();
|
||||
|
||||
if (bump_offset == "dx") {
|
||||
|
@@ -34,7 +34,7 @@ ccl_device_noinline void svm_node_geometry(KernelGlobals kg,
|
||||
data = sd->Ng;
|
||||
break;
|
||||
case NODE_GEOM_uv:
|
||||
data = make_float3(sd->u, sd->v, 0.0f);
|
||||
data = make_float3(1.0f - sd->u - sd->v, sd->u, 0.0f);
|
||||
break;
|
||||
default:
|
||||
data = make_float3(0.0f, 0.0f, 0.0f);
|
||||
@@ -57,7 +57,7 @@ ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg,
|
||||
data = sd->P + sd->dP.dx;
|
||||
break;
|
||||
case NODE_GEOM_uv:
|
||||
data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f);
|
||||
data = make_float3(1.0f - sd->u - sd->du.dx - sd->v - sd->dv.dx, sd->u + sd->du.dx, 0.0f);
|
||||
break;
|
||||
default:
|
||||
svm_node_geometry(kg, sd, stack, type, out_offset);
|
||||
@@ -84,7 +84,7 @@ ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg,
|
||||
data = sd->P + sd->dP.dy;
|
||||
break;
|
||||
case NODE_GEOM_uv:
|
||||
data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f);
|
||||
data = make_float3(1.0f - sd->u - sd->du.dy - sd->v - sd->dv.dy, sd->u + sd->du.dy, 0.0f);
|
||||
break;
|
||||
default:
|
||||
svm_node_geometry(kg, sd, stack, type, out_offset);
|
||||
|
@@ -19,10 +19,6 @@
|
||||
|
||||
#include "kernel/svm/types.h"
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# define __KERNEL_CPU__
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Constants */
|
||||
@@ -51,10 +47,10 @@ CCL_NAMESPACE_BEGIN
|
||||
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
|
||||
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
|
||||
#else
|
||||
#ifdef __KERNEL_GPU__
|
||||
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
|
||||
#else
|
||||
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
|
||||
#endif
|
||||
|
||||
/* Kernel features */
|
||||
@@ -83,7 +79,6 @@ CCL_NAMESPACE_BEGIN
|
||||
#define __LAMP_MIS__
|
||||
#define __CAMERA_MOTION__
|
||||
#define __OBJECT_MOTION__
|
||||
#define __BAKING__
|
||||
#define __PRINCIPLED__
|
||||
#define __SUBSURFACE__
|
||||
#define __VOLUME__
|
||||
@@ -92,16 +87,12 @@ CCL_NAMESPACE_BEGIN
|
||||
#define __BRANCHED_PATH__
|
||||
|
||||
/* Device specific features */
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
# ifdef WITH_OSL
|
||||
# define __OSL__
|
||||
# endif
|
||||
# define __VOLUME_RECORD_ALL__
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
|
||||
#ifdef __KERNEL_GPU_RAYTRACING__
|
||||
# undef __BAKING__
|
||||
#endif /* __KERNEL_GPU_RAYTRACING__ */
|
||||
#endif /* !__KERNEL_GPU__ */
|
||||
|
||||
/* MNEE currently causes "Compute function exceeds available temporary registers"
|
||||
* on Metal, disabled for now. */
|
||||
@@ -129,9 +120,6 @@ CCL_NAMESPACE_BEGIN
|
||||
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE)
|
||||
# undef __SUBSURFACE__
|
||||
# endif
|
||||
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING)
|
||||
# undef __BAKING__
|
||||
# endif
|
||||
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION)
|
||||
# undef __PATCH_EVAL__
|
||||
# endif
|
||||
@@ -730,7 +718,7 @@ typedef struct ccl_align(16) ShaderClosure
|
||||
{
|
||||
SHADER_CLOSURE_BASE;
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
float pad[2];
|
||||
#endif
|
||||
float data[10];
|
||||
@@ -1168,7 +1156,7 @@ typedef struct KernelData {
|
||||
uint max_shaders;
|
||||
uint volume_stack_size;
|
||||
|
||||
/* Always dynamic data mambers. */
|
||||
/* Always dynamic data members. */
|
||||
KernelCamera cam;
|
||||
KernelBake bake;
|
||||
KernelTables tables;
|
||||
@@ -1548,15 +1536,15 @@ enum KernelFeatureFlag : uint32_t {
|
||||
/* Must be constexpr on the CPU to avoid compile errors because the state types
|
||||
* are different depending on the main, shadow or null path. For GPU we don't have
|
||||
* C++17 everywhere so can't use it. */
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifdef __KERNEL_GPU__
|
||||
# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#else
|
||||
# define IF_KERNEL_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#else
|
||||
# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -3,13 +3,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include "util/profiling.h"
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#ifndef __KERNEL_GPU__
|
||||
# define PROFILING_INIT(kg, event) \
|
||||
ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event)
|
||||
# define PROFILING_EVENT(event) profiling_helper.set_event(event)
|
||||
@@ -22,6 +22,6 @@ CCL_NAMESPACE_BEGIN
|
||||
# define PROFILING_EVENT(event)
|
||||
# define PROFILING_INIT_FOR_SHADER(kg, event)
|
||||
# define PROFILING_SHADER(object, shader)
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
#endif /* !__KERNEL_GPU__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -73,16 +73,16 @@ static int fill_shader_input(const Scene *scene,
|
||||
|
||||
switch (j) {
|
||||
case 0:
|
||||
u = 1.0f;
|
||||
u = 0.0f;
|
||||
v = 0.0f;
|
||||
break;
|
||||
case 1:
|
||||
u = 0.0f;
|
||||
v = 1.0f;
|
||||
u = 1.0f;
|
||||
v = 0.0f;
|
||||
break;
|
||||
default:
|
||||
u = 0.0f;
|
||||
v = 0.0f;
|
||||
v = 1.0f;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -209,7 +209,7 @@ const BufferPass *BufferParams::get_actual_display_pass(const BufferPass *pass)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (pass->type == PASS_COMBINED) {
|
||||
if (pass->type == PASS_COMBINED && pass->lightgroup.empty()) {
|
||||
const BufferPass *shadow_catcher_matte_pass = find_pass(PASS_SHADOW_CATCHER_MATTE, pass->mode);
|
||||
if (shadow_catcher_matte_pass) {
|
||||
pass = shadow_catcher_matte_pass;
|
||||
|
@@ -2,7 +2,6 @@
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#define __KERNEL_AVX2__
|
||||
#define __KERNEL_CPU__
|
||||
|
||||
#define TEST_CATEGORY_NAME util_avx2
|
||||
|
||||
|
@@ -2,7 +2,6 @@
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#define __KERNEL_AVX__
|
||||
#define __KERNEL_CPU__
|
||||
|
||||
#define TEST_CATEGORY_NAME util_avx
|
||||
|
||||
|
@@ -63,6 +63,7 @@ set(SRC_HEADERS
|
||||
math_float2.h
|
||||
math_float3.h
|
||||
math_float4.h
|
||||
math_float8.h
|
||||
math_int2.h
|
||||
math_int3.h
|
||||
math_int4.h
|
||||
@@ -128,8 +129,6 @@ set(SRC_HEADERS
|
||||
types_uint4.h
|
||||
types_uint4_impl.h
|
||||
types_ushort4.h
|
||||
types_vector3.h
|
||||
types_vector3_impl.h
|
||||
unique_ptr.h
|
||||
vector.h
|
||||
version.h
|
||||
|
@@ -81,7 +81,7 @@
|
||||
/* macros */
|
||||
|
||||
/* hints for branch prediction, only use in code that runs a _lot_ */
|
||||
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
|
||||
#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
|
||||
# define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
#else
|
||||
|
@@ -511,6 +511,11 @@ ccl_device_inline float4 float3_to_float4(const float3 a)
|
||||
return make_float4(a.x, a.y, a.z, 1.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float4 float3_to_float4(const float3 a, const float w)
|
||||
{
|
||||
return make_float4(a.x, a.y, a.z, w);
|
||||
}
|
||||
|
||||
ccl_device_inline float inverse_lerp(float a, float b, float x)
|
||||
{
|
||||
return (x - a) / (b - a);
|
||||
@@ -535,6 +540,7 @@ CCL_NAMESPACE_END
|
||||
#include "util/math_float2.h"
|
||||
#include "util/math_float3.h"
|
||||
#include "util/math_float4.h"
|
||||
#include "util/math_float8.h"
|
||||
|
||||
#include "util/rect.h"
|
||||
|
||||
@@ -947,7 +953,11 @@ ccl_device_inline uint prev_power_of_two(uint x)
|
||||
ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
|
||||
{
|
||||
/* Use a native instruction if it exists. */
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__KERNEL_CUDA__)
|
||||
return __brev(x);
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
return reverse_bits(x);
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64)
|
||||
/* Assume the rbit is always available on 64bit ARM architecture. */
|
||||
__asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
|
||||
return x;
|
||||
@@ -956,10 +966,6 @@ ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
|
||||
* This 32-bit Thumb instruction is available in ARMv6T2 and above. */
|
||||
__asm__("rbit %0, %1" : "=r"(x) : "r"(x));
|
||||
return x;
|
||||
#elif defined(__KERNEL_CUDA__)
|
||||
return __brev(x);
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
return reverse_bits(x);
|
||||
#elif __has_builtin(__builtin_bitreverse32)
|
||||
return __builtin_bitreverse32(x);
|
||||
#else
|
||||
|
@@ -420,7 +420,7 @@ ccl_device_inline float fast_expf(float x)
|
||||
return fast_exp2f(x / M_LN2_F);
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_CPU__) && !defined(_MSC_VER)
|
||||
#if !defined(__KERNEL_GPU__) && !defined(_MSC_VER)
|
||||
/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see
|
||||
* T78047 and T78869 for details. Just disable for now, it only makes
|
||||
* a small difference in denoising performance. */
|
||||
|
@@ -147,8 +147,11 @@ ccl_device_inline float3 operator/(const float f, const float3 &a)
|
||||
|
||||
ccl_device_inline float3 operator/(const float3 &a, const float f)
|
||||
{
|
||||
float invf = 1.0f / f;
|
||||
return a * invf;
|
||||
# if defined(__KERNEL_SSE__)
|
||||
return float3(_mm_div_ps(a.m128, _mm_set1_ps(f)));
|
||||
# else
|
||||
return make_float3(a.x / f, a.y / f, a.z / f);
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
|
||||
@@ -284,8 +287,12 @@ ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
|
||||
|
||||
ccl_device_inline float3 cross(const float3 &a, const float3 &b)
|
||||
{
|
||||
float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
|
||||
return r;
|
||||
# ifdef __KERNEL_SSE__
|
||||
return float3(shuffle<1, 2, 0, 3>(
|
||||
msub(ssef(a), shuffle<1, 2, 0, 3>(ssef(b)), shuffle<1, 2, 0, 3>(ssef(a)) * ssef(b))));
|
||||
# else
|
||||
return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 normalize(const float3 &a)
|
||||
|
419
intern/cycles/util/math_float8.h
Normal file
419
intern/cycles/util/math_float8.h
Normal file
@@ -0,0 +1,419 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_MATH_FLOAT8_H__
|
||||
#define __UTIL_MATH_FLOAT8_H__
|
||||
|
||||
#ifndef __UTIL_MATH_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/*******************************************************************************
|
||||
* Declaration.
|
||||
*/
|
||||
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float f);
|
||||
ccl_device_inline float8_t operator+(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8_t operator-(const float8_t a);
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float f);
|
||||
ccl_device_inline float8_t operator-(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float f);
|
||||
ccl_device_inline float8_t operator*(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8_t operator/(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator/(const float8_t a, float f);
|
||||
ccl_device_inline float8_t operator/(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8_t operator+=(float8_t a, const float8_t b);
|
||||
|
||||
ccl_device_inline float8_t operator*=(float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator*=(float8_t a, float f);
|
||||
|
||||
ccl_device_inline float8_t operator/=(float8_t a, float f);
|
||||
|
||||
ccl_device_inline bool operator==(const float8_t a, const float8_t b);
|
||||
|
||||
ccl_device_inline float8_t rcp(const float8_t a);
|
||||
ccl_device_inline float8_t sqrt(const float8_t a);
|
||||
ccl_device_inline float8_t sqr(const float8_t a);
|
||||
ccl_device_inline bool is_zero(const float8_t a);
|
||||
ccl_device_inline float average(const float8_t a);
|
||||
ccl_device_inline float8_t min(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t max(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx);
|
||||
ccl_device_inline float8_t fabs(const float8_t a);
|
||||
ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t);
|
||||
ccl_device_inline float8_t saturate(const float8_t a);
|
||||
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float b);
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b);
|
||||
|
||||
ccl_device_inline float reduce_min(const float8_t a);
|
||||
ccl_device_inline float reduce_max(const float8_t a);
|
||||
ccl_device_inline float reduce_add(const float8_t a);
|
||||
|
||||
ccl_device_inline bool isequal(const float8_t a, const float8_t b);
|
||||
|
||||
/*******************************************************************************
|
||||
* Definition.
|
||||
*/
|
||||
|
||||
ccl_device_inline float8_t zero_float8_t()
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_setzero_ps());
|
||||
#else
|
||||
return make_float8_t(0.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t one_float8_t()
|
||||
{
|
||||
return make_float8_t(1.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_add_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8_t(
|
||||
a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float f)
|
||||
{
|
||||
return a + make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator+(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8_t(f) + a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator-(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
__m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
|
||||
return float8_t(_mm256_xor_ps(a.m256, mask));
|
||||
#else
|
||||
return make_float8_t(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_sub_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8_t(
|
||||
a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float f)
|
||||
{
|
||||
return a - make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator-(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8_t(f) - a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_mul_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8_t(
|
||||
a.a * b.a, a.b * b.b, a.c * b.c, a.d * b.d, a.e * b.e, a.f * b.f, a.g * b.g, a.h * b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float f)
|
||||
{
|
||||
return a * make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator*(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8_t(f) * a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator/(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_div_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8_t(
|
||||
a.a / b.a, a.b / b.b, a.c / b.c, a.d / b.d, a.e / b.e, a.f / b.f, a.g / b.g, a.h / b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator/(const float8_t a, const float f)
|
||||
{
|
||||
return a / make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator/(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8_t(f) / a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator+=(float8_t a, const float8_t b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator-=(float8_t a, const float8_t b)
|
||||
{
|
||||
return a = a - b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator*=(float8_t a, const float8_t b)
|
||||
{
|
||||
return a = a * b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator*=(float8_t a, float f)
|
||||
{
|
||||
return a = a * f;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t operator/=(float8_t a, float f)
|
||||
{
|
||||
return a = a / f;
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator==(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return (_mm256_movemask_ps(_mm256_castsi256_ps(
|
||||
_mm256_cmpeq_epi32(_mm256_castps_si256(a.m256), _mm256_castps_si256(b.m256)))) &
|
||||
0b11111111) == 0b11111111;
|
||||
#else
|
||||
return (a.a == b.a && a.b == b.b && a.c == b.c && a.d == b.d && a.e == b.e && a.f == b.f &&
|
||||
a.g == b.g && a.h == b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t rcp(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_rcp_ps(a.m256));
|
||||
#else
|
||||
return make_float8_t(1.0f / a.a,
|
||||
1.0f / a.b,
|
||||
1.0f / a.c,
|
||||
1.0f / a.d,
|
||||
1.0f / a.e,
|
||||
1.0f / a.f,
|
||||
1.0f / a.g,
|
||||
1.0f / a.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t sqrt(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_sqrt_ps(a.m256));
|
||||
#else
|
||||
return make_float8_t(sqrtf(a.a),
|
||||
sqrtf(a.b),
|
||||
sqrtf(a.c),
|
||||
sqrtf(a.d),
|
||||
sqrtf(a.e),
|
||||
sqrtf(a.f),
|
||||
sqrtf(a.g),
|
||||
sqrtf(a.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t sqr(const float8_t a)
|
||||
{
|
||||
return a * a;
|
||||
}
|
||||
|
||||
ccl_device_inline bool is_zero(const float8_t a)
|
||||
{
|
||||
return a == make_float8_t(0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float average(const float8_t a)
|
||||
{
|
||||
return reduce_add(a) / 8.0f;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t min(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_min_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8_t(min(a.a, b.a),
|
||||
min(a.b, b.b),
|
||||
min(a.c, b.c),
|
||||
min(a.d, b.d),
|
||||
min(a.e, b.e),
|
||||
min(a.f, b.f),
|
||||
min(a.g, b.g),
|
||||
min(a.h, b.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t max(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_max_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8_t(max(a.a, b.a),
|
||||
max(a.b, b.b),
|
||||
max(a.c, b.c),
|
||||
max(a.d, b.d),
|
||||
max(a.e, b.e),
|
||||
max(a.f, b.f),
|
||||
max(a.g, b.g),
|
||||
max(a.h, b.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx)
|
||||
{
|
||||
return min(max(a, mn), mx);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t fabs(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8_t(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
|
||||
#else
|
||||
return make_float8_t(fabsf(a.a),
|
||||
fabsf(a.b),
|
||||
fabsf(a.c),
|
||||
fabsf(a.d),
|
||||
fabsf(a.e),
|
||||
fabsf(a.f),
|
||||
fabsf(a.g),
|
||||
fabsf(a.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t)
|
||||
{
|
||||
return a + t * (b - a);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t saturate(const float8_t a)
|
||||
{
|
||||
return clamp(a, make_float8_t(0.0f), make_float8_t(1.0f));
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t exp(float8_t v)
|
||||
{
|
||||
return make_float8_t(
|
||||
expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h));
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t log(float8_t v)
|
||||
{
|
||||
return make_float8_t(
|
||||
logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h));
|
||||
}
|
||||
|
||||
ccl_device_inline float dot(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8_t t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
|
||||
return t[0] + t[4];
|
||||
#else
|
||||
return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) +
|
||||
(a.g * b.g) + (a.h * b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t pow(float8_t v, float e)
|
||||
{
|
||||
return make_float8_t(powf(v.a, e),
|
||||
powf(v.b, e),
|
||||
powf(v.c, e),
|
||||
powf(v.d, e),
|
||||
powf(v.e, e),
|
||||
powf(v.f, e),
|
||||
powf(v.g, e),
|
||||
powf(v.h, e));
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_min(const float8_t a)
|
||||
{
|
||||
return min(min(min(a.a, a.b), min(a.c, a.d)), min(min(a.e, a.f), min(a.g, a.h)));
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_max(const float8_t a)
|
||||
{
|
||||
return max(max(max(a.a, a.b), max(a.c, a.d)), max(max(a.e, a.f), max(a.g, a.h)));
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_add(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8_t b(_mm256_hadd_ps(a.m256, a.m256));
|
||||
float8_t h(_mm256_hadd_ps(b.m256, b.m256));
|
||||
return h[0] + h[4];
|
||||
#else
|
||||
return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool isequal(const float8_t a, const float8_t b)
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float b)
|
||||
{
|
||||
return (b != 0.0f) ? a / b : make_float8_t(0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b)
|
||||
{
|
||||
return make_float8_t((b.a != 0.0f) ? a.a / b.a : 0.0f,
|
||||
(b.b != 0.0f) ? a.b / b.b : 0.0f,
|
||||
(b.c != 0.0f) ? a.c / b.c : 0.0f,
|
||||
(b.d != 0.0f) ? a.d / b.d : 0.0f,
|
||||
(b.e != 0.0f) ? a.e / b.e : 0.0f,
|
||||
(b.f != 0.0f) ? a.f / b.f : 0.0f,
|
||||
(b.g != 0.0f) ? a.g / b.g : 0.0f,
|
||||
(b.h != 0.0f) ? a.h / b.h : 0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t ensure_finite(float8_t v)
|
||||
{
|
||||
v.a = ensure_finite(v.a);
|
||||
v.b = ensure_finite(v.b);
|
||||
v.c = ensure_finite(v.c);
|
||||
v.d = ensure_finite(v.d);
|
||||
v.e = ensure_finite(v.e);
|
||||
v.f = ensure_finite(v.f);
|
||||
v.g = ensure_finite(v.g);
|
||||
v.h = ensure_finite(v.h);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
ccl_device_inline bool isfinite_safe(float8_t v)
|
||||
{
|
||||
return isfinite_safe(v.a) && isfinite_safe(v.b) && isfinite_safe(v.c) && isfinite_safe(v.d) &&
|
||||
isfinite_safe(v.e) && isfinite_safe(v.f) && isfinite_safe(v.g) && isfinite_safe(v.h);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_MATH_FLOAT8_H__ */
|
@@ -105,10 +105,10 @@ ccl_device bool ray_disk_intersect(float3 ray_P,
|
||||
return false;
|
||||
}
|
||||
|
||||
ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
|
||||
float3 ray_dir,
|
||||
float ray_tmin,
|
||||
float ray_tmax,
|
||||
ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
const float3 tri_a,
|
||||
const float3 tri_b,
|
||||
const float3 tri_c,
|
||||
@@ -116,14 +116,13 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
|
||||
ccl_private float *isect_v,
|
||||
ccl_private float *isect_t)
|
||||
{
|
||||
#define dot3(a, b) dot(a, b)
|
||||
const float3 P = ray_P;
|
||||
const float3 dir = ray_dir;
|
||||
/* This implementation matches the Plücker coordinates triangle intersection
|
||||
* in Embree. */
|
||||
|
||||
/* Calculate vertices relative to ray origin. */
|
||||
const float3 v0 = tri_c - P;
|
||||
const float3 v1 = tri_a - P;
|
||||
const float3 v2 = tri_b - P;
|
||||
const float3 v0 = tri_a - ray_P;
|
||||
const float3 v1 = tri_b - ray_P;
|
||||
const float3 v2 = tri_c - ray_P;
|
||||
|
||||
/* Calculate triangle edges. */
|
||||
const float3 e0 = v2 - v0;
|
||||
@@ -131,40 +130,40 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
|
||||
const float3 e2 = v1 - v2;
|
||||
|
||||
/* Perform edge tests. */
|
||||
const float U = dot(cross(v2 + v0, e0), ray_dir);
|
||||
const float V = dot(cross(v0 + v1, e1), ray_dir);
|
||||
const float W = dot(cross(v1 + v2, e2), ray_dir);
|
||||
const float U = dot(cross(e0, v2 + v0), ray_D);
|
||||
const float V = dot(cross(e1, v0 + v1), ray_D);
|
||||
const float W = dot(cross(e2, v1 + v2), ray_D);
|
||||
|
||||
const float UVW = U + V + W;
|
||||
const float eps = FLT_EPSILON * fabsf(UVW);
|
||||
const float minUVW = min(U, min(V, W));
|
||||
const float maxUVW = max(U, max(V, W));
|
||||
|
||||
if (minUVW < 0.0f && maxUVW > 0.0f) {
|
||||
if (!(minUVW >= -eps || maxUVW <= eps)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Calculate geometry normal and denominator. */
|
||||
const float3 Ng1 = cross(e1, e0);
|
||||
// const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
|
||||
const float3 Ng = Ng1 + Ng1;
|
||||
const float den = dot3(Ng, dir);
|
||||
const float den = dot(Ng, ray_D);
|
||||
/* Avoid division by 0. */
|
||||
if (UNLIKELY(den == 0.0f)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Perform depth test. */
|
||||
const float T = dot3(v0, Ng);
|
||||
const float T = dot(v0, Ng);
|
||||
const float t = T / den;
|
||||
if (!(t >= ray_tmin && t <= ray_tmax)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*isect_u = U / den;
|
||||
*isect_v = V / den;
|
||||
const float rcp_UVW = (fabsf(UVW) < 1e-18f) ? 0.0f : 1.0f / UVW;
|
||||
*isect_u = min(U * rcp_UVW, 1.0f);
|
||||
*isect_v = min(V * rcp_UVW, 1.0f);
|
||||
*isect_t = t;
|
||||
return true;
|
||||
|
||||
#undef dot3
|
||||
}
|
||||
|
||||
/* Tests for an intersection between a ray and a quad defined by
|
||||
|
@@ -99,15 +99,7 @@ ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
|
||||
memcpy(M, &tfm, sizeof(M));
|
||||
|
||||
if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
|
||||
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
|
||||
* never be in this situation, but try to invert it anyway with tweak */
|
||||
M[0][0] += 1e-8f;
|
||||
M[1][1] += 1e-8f;
|
||||
M[2][2] += 1e-8f;
|
||||
|
||||
if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
|
||||
return projection_identity();
|
||||
}
|
||||
return projection_identity();
|
||||
}
|
||||
|
||||
memcpy(&tfmR, R, sizeof(R));
|
||||
@@ -115,16 +107,9 @@ ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
|
||||
return tfmR;
|
||||
}
|
||||
|
||||
Transform transform_inverse(const Transform &tfm)
|
||||
{
|
||||
ProjectionTransform projection(tfm);
|
||||
return projection_to_transform(projection_inverse(projection));
|
||||
}
|
||||
|
||||
Transform transform_transposed_inverse(const Transform &tfm)
|
||||
{
|
||||
ProjectionTransform projection(tfm);
|
||||
ProjectionTransform iprojection = projection_inverse(projection);
|
||||
ProjectionTransform iprojection(transform_inverse(tfm));
|
||||
return projection_to_transform(projection_transpose(iprojection));
|
||||
}
|
||||
|
||||
|
@@ -63,10 +63,10 @@ ccl_device_inline float3 transform_point(ccl_private const Transform *t, const f
|
||||
|
||||
_MM_TRANSPOSE4_PS(x, y, z, w);
|
||||
|
||||
ssef tmp = shuffle<0>(aa) * x;
|
||||
tmp = madd(shuffle<1>(aa), y, tmp);
|
||||
ssef tmp = w;
|
||||
tmp = madd(shuffle<2>(aa), z, tmp);
|
||||
tmp += w;
|
||||
tmp = madd(shuffle<1>(aa), y, tmp);
|
||||
tmp = madd(shuffle<0>(aa), x, tmp);
|
||||
|
||||
return float3(tmp.m128);
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
@@ -93,9 +93,9 @@ ccl_device_inline float3 transform_direction(ccl_private const Transform *t, con
|
||||
|
||||
_MM_TRANSPOSE4_PS(x, y, z, w);
|
||||
|
||||
ssef tmp = shuffle<0>(aa) * x;
|
||||
ssef tmp = shuffle<2>(aa) * z;
|
||||
tmp = madd(shuffle<1>(aa), y, tmp);
|
||||
tmp = madd(shuffle<2>(aa), z, tmp);
|
||||
tmp = madd(shuffle<0>(aa), x, tmp);
|
||||
|
||||
return float3(tmp.m128);
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
@@ -312,7 +312,6 @@ ccl_device_inline void transform_set_column(Transform *t, int column, float3 val
|
||||
t->z[column] = value.z;
|
||||
}
|
||||
|
||||
Transform transform_inverse(const Transform &a);
|
||||
Transform transform_transposed_inverse(const Transform &a);
|
||||
|
||||
ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
|
||||
@@ -392,39 +391,47 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
|
||||
#endif /* defined(__KERNEL_GPU_RAYTRACING__) */
|
||||
}
|
||||
|
||||
ccl_device_inline Transform transform_quick_inverse(Transform M)
|
||||
ccl_device_inline Transform transform_inverse(const Transform tfm)
|
||||
{
|
||||
/* possible optimization: can we avoid doing this altogether and construct
|
||||
* the inverse matrix directly from negated translation, transposed rotation,
|
||||
* scale can be inverted but what about shearing? */
|
||||
Transform R;
|
||||
float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
|
||||
M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
|
||||
/* This implementation matches the one in Embree exactly, to ensure consistent
|
||||
* results with the ray intersection of instances. */
|
||||
float3 x = make_float3(tfm.x.x, tfm.y.x, tfm.z.x);
|
||||
float3 y = make_float3(tfm.x.y, tfm.y.y, tfm.z.y);
|
||||
float3 z = make_float3(tfm.x.z, tfm.y.z, tfm.z.z);
|
||||
float3 w = make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
|
||||
|
||||
/* Compute determinant. */
|
||||
float det = dot(x, cross(y, z));
|
||||
|
||||
if (det == 0.0f) {
|
||||
M.x.x += 1e-8f;
|
||||
M.y.y += 1e-8f;
|
||||
M.z.z += 1e-8f;
|
||||
det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
|
||||
M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
|
||||
/* Matrix is degenerate (e.g. 0 scale on some axis), ideally we should
|
||||
* never be in this situation, but try to invert it anyway with tweak.
|
||||
*
|
||||
* This logic does not match Embree which would just give an invalid
|
||||
* matrix. A better solution would be to remove this and ensure any object
|
||||
* matrix is valid. */
|
||||
x.x += 1e-8f;
|
||||
y.y += 1e-8f;
|
||||
z.z += 1e-8f;
|
||||
|
||||
det = dot(x, cross(y, z));
|
||||
if (det == 0.0f) {
|
||||
det = FLT_MAX;
|
||||
}
|
||||
}
|
||||
det = (det != 0.0f) ? 1.0f / det : 0.0f;
|
||||
|
||||
float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z,
|
||||
M.z.y * M.x.z - M.z.z * M.x.y,
|
||||
M.y.z * M.x.y - M.y.y * M.x.z);
|
||||
float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x,
|
||||
M.z.z * M.x.x - M.z.x * M.x.z,
|
||||
M.y.x * M.x.z - M.y.z * M.x.x);
|
||||
float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y,
|
||||
M.z.x * M.x.y - M.z.y * M.x.x,
|
||||
M.y.y * M.x.x - M.y.x * M.x.y);
|
||||
float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
|
||||
/* Divide adjoint matrix by the determinant to compute inverse of 3x3 matrix. */
|
||||
const float3 inverse_x = cross(y, z) / det;
|
||||
const float3 inverse_y = cross(z, x) / det;
|
||||
const float3 inverse_z = cross(x, y) / det;
|
||||
|
||||
R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
|
||||
R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
|
||||
R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
|
||||
/* Compute translation and fill transform. */
|
||||
Transform itfm;
|
||||
itfm.x = float3_to_float4(inverse_x, -dot(inverse_x, w));
|
||||
itfm.y = float3_to_float4(inverse_y, -dot(inverse_y, w));
|
||||
itfm.z = float3_to_float4(inverse_z, -dot(inverse_z, w));
|
||||
|
||||
return R;
|
||||
return itfm;
|
||||
}
|
||||
|
||||
ccl_device_inline void transform_compose(ccl_private Transform *tfm,
|
||||
|
@@ -12,6 +12,7 @@
|
||||
|
||||
#if !defined(__KERNEL_GPU__)
|
||||
# include <stdint.h>
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "util/defines.h"
|
||||
@@ -70,6 +71,12 @@ ccl_device_inline bool is_power_of_two(size_t x)
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
/* Most GPU APIs matching native vector types, so we only need to implement them for
|
||||
* CPU and oneAPI. */
|
||||
#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__)
|
||||
# define __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
#endif
|
||||
|
||||
/* Vectorized types declaration. */
|
||||
#include "util/types_uchar2.h"
|
||||
#include "util/types_uchar3.h"
|
||||
@@ -90,8 +97,6 @@ CCL_NAMESPACE_END
|
||||
#include "util/types_float4.h"
|
||||
#include "util/types_float8.h"
|
||||
|
||||
#include "util/types_vector3.h"
|
||||
|
||||
/* Vectorized types implementation. */
|
||||
#include "util/types_uchar2_impl.h"
|
||||
#include "util/types_uchar3_impl.h"
|
||||
@@ -110,8 +115,6 @@ CCL_NAMESPACE_END
|
||||
#include "util/types_float4_impl.h"
|
||||
#include "util/types_float8_impl.h"
|
||||
|
||||
#include "util/types_vector3_impl.h"
|
||||
|
||||
/* SSE types. */
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include "util/sseb.h"
|
||||
|
@@ -1,8 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT2_H__
|
||||
#define __UTIL_TYPES_FLOAT2_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -10,18 +9,18 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
struct float2 {
|
||||
float x, y;
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline float operator[](int i) const;
|
||||
__forceinline float &operator[](int i);
|
||||
# endif
|
||||
};
|
||||
|
||||
ccl_device_inline float2 make_float2(float x, float y);
|
||||
ccl_device_inline void print_float2(const char *label, const float2 &a);
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT2_H__ */
|
||||
|
@@ -1,20 +1,16 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__
|
||||
#define __UTIL_TYPES_FLOAT2_IMPL_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <cstdio>
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline float float2::operator[](int i) const
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
@@ -28,6 +24,7 @@ __forceinline float &float2::operator[](int i)
|
||||
util_assert(i < 2);
|
||||
return *(&x + i);
|
||||
}
|
||||
# endif
|
||||
|
||||
ccl_device_inline float2 make_float2(float x, float y)
|
||||
{
|
||||
@@ -39,8 +36,6 @@ ccl_device_inline void print_float2(const char *label, const float2 &a)
|
||||
{
|
||||
printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
|
||||
}
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */
|
||||
|
@@ -1,8 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT3_H__
|
||||
#define __UTIL_TYPES_FLOAT3_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -10,17 +9,28 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
struct ccl_try_align(16) float3
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
# ifdef __KERNEL_GPU__
|
||||
/* Compact structure for GPU. */
|
||||
float x, y, z;
|
||||
# else
|
||||
/* SIMD aligned structure for CPU. */
|
||||
# ifdef __KERNEL_SSE__
|
||||
union {
|
||||
__m128 m128;
|
||||
struct {
|
||||
float x, y, z, w;
|
||||
};
|
||||
};
|
||||
# else
|
||||
float x, y, z, w;
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef __KERNEL_SSE__
|
||||
/* Convenient constructors and operators for SIMD, otherwise default is enough. */
|
||||
__forceinline float3();
|
||||
__forceinline float3(const float3 &a);
|
||||
__forceinline explicit float3(const __m128 &a);
|
||||
@@ -29,18 +39,18 @@ struct ccl_try_align(16) float3
|
||||
__forceinline operator __m128 &();
|
||||
|
||||
__forceinline float3 &operator=(const float3 &a);
|
||||
# else /* __KERNEL_SSE__ */
|
||||
float x, y, z, w;
|
||||
# endif /* __KERNEL_SSE__ */
|
||||
# endif
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline float operator[](int i) const;
|
||||
__forceinline float &operator[](int i);
|
||||
# endif
|
||||
};
|
||||
|
||||
ccl_device_inline float3 make_float3(float f);
|
||||
ccl_device_inline float3 make_float3(float x, float y, float z);
|
||||
ccl_device_inline void print_float3(const char *label, const float3 &a);
|
||||
#endif /* !defined(__KERNEL_GPU__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
|
||||
* CPU SIMD instructions can be used. */
|
||||
@@ -78,5 +88,3 @@ struct packed_float3 {
|
||||
static_assert(sizeof(packed_float3) == 12, "packed_float3 expected to be exactly 12 bytes");
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT3_H__ */
|
||||
|
@@ -1,20 +1,15 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__
|
||||
#define __UTIL_TYPES_FLOAT3_IMPL_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <cstdio>
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
# ifdef __KERNEL_SSE__
|
||||
__forceinline float3::float3()
|
||||
{
|
||||
@@ -45,6 +40,7 @@ __forceinline float3 &float3::operator=(const float3 &a)
|
||||
}
|
||||
# endif /* __KERNEL_SSE__ */
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline float float3::operator[](int i) const
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
@@ -58,23 +54,32 @@ __forceinline float &float3::operator[](int i)
|
||||
util_assert(i < 3);
|
||||
return *(&x + i);
|
||||
}
|
||||
# endif
|
||||
|
||||
ccl_device_inline float3 make_float3(float f)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
float3 a(_mm_set1_ps(f));
|
||||
# ifdef __KERNEL_GPU__
|
||||
float3 a = {f, f, f};
|
||||
# else
|
||||
# ifdef __KERNEL_SSE__
|
||||
float3 a(_mm_set1_ps(f));
|
||||
# else
|
||||
float3 a = {f, f, f, f};
|
||||
# endif
|
||||
# endif
|
||||
return a;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 make_float3(float x, float y, float z)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
float3 a(_mm_set_ps(0.0f, z, y, x));
|
||||
# ifdef __KERNEL_GPU__
|
||||
float3 a = {x, y, z};
|
||||
# else
|
||||
# ifdef __KERNEL_SSE__
|
||||
float3 a(_mm_set_ps(0.0f, z, y, x));
|
||||
# else
|
||||
float3 a = {x, y, z, 0.0f};
|
||||
# endif
|
||||
# endif
|
||||
return a;
|
||||
}
|
||||
@@ -83,8 +88,6 @@ ccl_device_inline void print_float3(const char *label, const float3 &a)
|
||||
{
|
||||
printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
|
||||
}
|
||||
#endif /* !defined(__KERNEL_GPU__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */
|
||||
|
@@ -1,8 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT4_H__
|
||||
#define __UTIL_TYPES_FLOAT4_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -10,7 +9,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
struct int4;
|
||||
|
||||
struct ccl_try_align(16) float4
|
||||
@@ -35,16 +34,16 @@ struct ccl_try_align(16) float4
|
||||
float x, y, z, w;
|
||||
# endif /* __KERNEL_SSE__ */
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline float operator[](int i) const;
|
||||
__forceinline float &operator[](int i);
|
||||
# endif
|
||||
};
|
||||
|
||||
ccl_device_inline float4 make_float4(float f);
|
||||
ccl_device_inline float4 make_float4(float x, float y, float z, float w);
|
||||
ccl_device_inline float4 make_float4(const int4 &i);
|
||||
ccl_device_inline void print_float4(const char *label, const float4 &a);
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT4_H__ */
|
||||
|
@@ -1,20 +1,15 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT4_IMPL_H__
|
||||
#define __UTIL_TYPES_FLOAT4_IMPL_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <cstdio>
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
# ifdef __KERNEL_SSE__
|
||||
__forceinline float4::float4()
|
||||
{
|
||||
@@ -41,6 +36,7 @@ __forceinline float4 &float4::operator=(const float4 &a)
|
||||
}
|
||||
# endif /* __KERNEL_SSE__ */
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline float float4::operator[](int i) const
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
@@ -54,6 +50,7 @@ __forceinline float &float4::operator[](int i)
|
||||
util_assert(i < 4);
|
||||
return *(&x + i);
|
||||
}
|
||||
# endif
|
||||
|
||||
ccl_device_inline float4 make_float4(float f)
|
||||
{
|
||||
@@ -89,8 +86,6 @@ ccl_device_inline void print_float4(const char *label, const float4 &a)
|
||||
{
|
||||
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
|
||||
}
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */
|
||||
|
@@ -2,8 +2,7 @@
|
||||
* Original code Copyright 2017, Intel Corporation
|
||||
* Modifications Copyright 2018-2022 Blender Foundation. */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT8_H__
|
||||
#define __UTIL_TYPES_FLOAT8_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -11,11 +10,16 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
/* float8 is a reserved type in Metal that has not been implemented. For
|
||||
* that reason this is named float8_t and not using native vector types. */
|
||||
|
||||
struct ccl_try_align(32) float8
|
||||
#ifdef __KERNEL_GPU__
|
||||
struct float8_t
|
||||
#else
|
||||
struct ccl_try_align(32) float8_t
|
||||
#endif
|
||||
{
|
||||
# ifdef __KERNEL_AVX2__
|
||||
#ifdef __KERNEL_AVX2__
|
||||
union {
|
||||
__m256 m256;
|
||||
struct {
|
||||
@@ -23,28 +27,27 @@ struct ccl_try_align(32) float8
|
||||
};
|
||||
};
|
||||
|
||||
__forceinline float8();
|
||||
__forceinline float8(const float8 &a);
|
||||
__forceinline explicit float8(const __m256 &a);
|
||||
__forceinline float8_t();
|
||||
__forceinline float8_t(const float8_t &a);
|
||||
__forceinline explicit float8_t(const __m256 &a);
|
||||
|
||||
__forceinline operator const __m256 &() const;
|
||||
__forceinline operator __m256 &();
|
||||
|
||||
__forceinline float8 &operator=(const float8 &a);
|
||||
__forceinline float8_t &operator=(const float8_t &a);
|
||||
|
||||
# else /* __KERNEL_AVX2__ */
|
||||
#else /* __KERNEL_AVX2__ */
|
||||
float a, b, c, d, e, f, g, h;
|
||||
# endif /* __KERNEL_AVX2__ */
|
||||
#endif /* __KERNEL_AVX2__ */
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
__forceinline float operator[](int i) const;
|
||||
__forceinline float &operator[](int i);
|
||||
#endif
|
||||
};
|
||||
|
||||
ccl_device_inline float8 make_float8(float f);
|
||||
ccl_device_inline float8
|
||||
make_float8(float a, float b, float c, float d, float e, float f, float g, float h);
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
ccl_device_inline float8_t make_float8_t(float f);
|
||||
ccl_device_inline float8_t
|
||||
make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h);
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT8_H__ */
|
||||
|
@@ -2,87 +2,79 @@
|
||||
* Original code Copyright 2017, Intel Corporation
|
||||
* Modifications Copyright 2018-2022 Blender Foundation. */
|
||||
|
||||
#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
|
||||
#define __UTIL_TYPES_FLOAT8_IMPL_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <cstdio>
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
# ifdef __KERNEL_AVX2__
|
||||
__forceinline float8::float8()
|
||||
#ifdef __KERNEL_AVX2__
|
||||
__forceinline float8_t::float8_t()
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline float8::float8(const float8 &f) : m256(f.m256)
|
||||
__forceinline float8_t::float8_t(const float8_t &f) : m256(f.m256)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline float8::float8(const __m256 &f) : m256(f)
|
||||
__forceinline float8_t::float8_t(const __m256 &f) : m256(f)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline float8::operator const __m256 &() const
|
||||
__forceinline float8_t::operator const __m256 &() const
|
||||
{
|
||||
return m256;
|
||||
}
|
||||
|
||||
__forceinline float8::operator __m256 &()
|
||||
__forceinline float8_t::operator __m256 &()
|
||||
{
|
||||
return m256;
|
||||
}
|
||||
|
||||
__forceinline float8 &float8::operator=(const float8 &f)
|
||||
__forceinline float8_t &float8_t::operator=(const float8_t &f)
|
||||
{
|
||||
m256 = f.m256;
|
||||
return *this;
|
||||
}
|
||||
# endif /* __KERNEL_AVX2__ */
|
||||
#endif /* __KERNEL_AVX2__ */
|
||||
|
||||
__forceinline float float8::operator[](int i) const
|
||||
#ifndef __KERNEL_GPU__
|
||||
__forceinline float float8_t::operator[](int i) const
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
util_assert(i < 8);
|
||||
return *(&a + i);
|
||||
}
|
||||
|
||||
__forceinline float &float8::operator[](int i)
|
||||
__forceinline float &float8_t::operator[](int i)
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
util_assert(i < 8);
|
||||
return *(&a + i);
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device_inline float8 make_float8(float f)
|
||||
ccl_device_inline float8_t make_float8_t(float f)
|
||||
{
|
||||
# ifdef __KERNEL_AVX2__
|
||||
float8 r(_mm256_set1_ps(f));
|
||||
# else
|
||||
float8 r = {f, f, f, f, f, f, f, f};
|
||||
# endif
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8_t r(_mm256_set1_ps(f));
|
||||
#else
|
||||
float8_t r = {f, f, f, f, f, f, f, f};
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
ccl_device_inline float8
|
||||
make_float8(float a, float b, float c, float d, float e, float f, float g, float h)
|
||||
ccl_device_inline float8_t
|
||||
make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h)
|
||||
{
|
||||
# ifdef __KERNEL_AVX2__
|
||||
float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h));
|
||||
# else
|
||||
float8 r = {a, b, c, d, e, f, g, h};
|
||||
# endif
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8_t r(_mm256_setr_ps(a, b, c, d, e, f, g, h));
|
||||
#else
|
||||
float8_t r = {a, b, c, d, e, f, g, h};
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */
|
||||
|
@@ -1,8 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_INT2_H__
|
||||
#define __UTIL_TYPES_INT2_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -10,17 +9,17 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
struct int2 {
|
||||
int x, y;
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline int operator[](int i) const;
|
||||
__forceinline int &operator[](int i);
|
||||
# endif
|
||||
};
|
||||
|
||||
ccl_device_inline int2 make_int2(int x, int y);
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_INT2_H__ */
|
||||
|
@@ -1,8 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_INT2_IMPL_H__
|
||||
#define __UTIL_TYPES_INT2_IMPL_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -10,7 +9,8 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
# ifndef __KERNEL_GPU__
|
||||
int int2::operator[](int i) const
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
@@ -24,14 +24,13 @@ int &int2::operator[](int i)
|
||||
util_assert(i < 2);
|
||||
return *(&x + i);
|
||||
}
|
||||
# endif
|
||||
|
||||
ccl_device_inline int2 make_int2(int x, int y)
|
||||
{
|
||||
int2 a = {x, y};
|
||||
return a;
|
||||
}
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_INT2_IMPL_H__ */
|
||||
|
@@ -1,8 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifndef __UTIL_TYPES_INT3_H__
|
||||
#define __UTIL_TYPES_INT3_H__
|
||||
#pragma once
|
||||
|
||||
#ifndef __UTIL_TYPES_H__
|
||||
# error "Do not include this file directly, include util/types.h instead."
|
||||
@@ -10,10 +9,15 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
struct ccl_try_align(16) int3
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
# ifdef __KERNEL_GPU__
|
||||
/* Compact structure on the GPU. */
|
||||
int x, y, z;
|
||||
# else
|
||||
/* SIMD aligned structure for CPU. */
|
||||
# ifdef __KERNEL_SSE__
|
||||
union {
|
||||
__m128i m128;
|
||||
struct {
|
||||
@@ -29,19 +33,20 @@ struct ccl_try_align(16) int3
|
||||
__forceinline operator __m128i &();
|
||||
|
||||
__forceinline int3 &operator=(const int3 &a);
|
||||
# else /* __KERNEL_SSE__ */
|
||||
# else /* __KERNEL_SSE__ */
|
||||
int x, y, z, w;
|
||||
# endif /* __KERNEL_SSE__ */
|
||||
# endif /* __KERNEL_SSE__ */
|
||||
# endif
|
||||
|
||||
# ifndef __KERNEL_GPU__
|
||||
__forceinline int operator[](int i) const;
|
||||
__forceinline int &operator[](int i);
|
||||
# endif
|
||||
};
|
||||
|
||||
ccl_device_inline int3 make_int3(int i);
|
||||
ccl_device_inline int3 make_int3(int x, int y, int z);
|
||||
ccl_device_inline void print_int3(const char *label, const int3 &a);
|
||||
#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
|
||||
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_INT3_H__ */
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user