Compare commits

..

19 Commits

Author SHA1 Message Date
3ce788e015 Progress on input properties, currently crashes 2023-04-12 16:48:59 -04:00
8be481fbd8 Fix missing update tag 2023-04-12 16:18:46 -04:00
a385a00821 Merge branch 'main' into node-group-operators 2023-04-12 15:58:29 -04:00
677273eb41 Merge branch 'main' into node-group-operators 2023-04-07 17:45:51 -04:00
abe86a6a5d Merge branch 'main' into node-group-operators 2023-04-07 08:14:34 -04:00
9e01c4f69e List groups in menu, some crash fixes 2023-04-05 17:07:56 -04:00
7212f52457 Progress 2023-04-05 16:33:47 -04:00
51d48bdcdb Merge branch 'main' into node-group-operators 2023-04-05 14:14:42 -04:00
a10ae5cf05 Progress 2023-04-05 13:34:31 -04:00
19051b863e Merge branch 'main' into node-group-operators 2023-04-05 11:55:48 -04:00
1ad98ac65e Merge branch 'main' into node-group-operators 2023-04-04 18:02:37 -04:00
96e8f8a002 Some progress 2023-03-31 12:32:59 -04:00
4c19994c11 Merge branch 'main' into node-group-operators 2023-03-31 11:58:07 -04:00
7c67f8c719 Progress 2023-03-30 19:17:05 -04:00
cf77874cdb Merge branch 'main' into node-group-operators 2023-03-30 18:52:25 -04:00
7e748413f9 Add no-op operator base 2023-03-30 18:38:33 -04:00
41c5490137 Merge branch 'main' into node-group-operators 2023-03-30 16:34:46 -04:00
338db40df9 Merge branch 'main' into node-group-operators 2023-03-30 15:25:08 -04:00
39dfa015a0 Geometry Nodes: Add node editor option to edit "Operator" node groups
This functions as a way to avoid having the context affect which node
groups are displayed. This is necessary since there is no concept of an
active operator and most node groups could be used as an operator.

It was considered to make this a more general "No Context" option for
the node editor. That still might happen eventually, but we want to
encourage users to use the modifier context since it allows the
various inspection features to work properly.
2023-03-27 10:02:15 -04:00
1092 changed files with 18820 additions and 25511 deletions

View File

@@ -521,8 +521,7 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
# Radeon VII (gfx906) not currently working with HIP SDK, so left out of the list.
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@@ -1581,8 +1580,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_MISSING_NORETURN -Wno-missing-noreturn)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_UNUSED_BUT_SET_VARIABLE -Wno-unused-but-set-variable)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_DEPRECATED_DECLARATIONS -Wno-deprecated-declarations)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PARAMETER -Wno-unused-parameter)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PRIVATE_FIELD -Wno-unused-private-field)
@@ -1596,7 +1593,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNDEFINED_VAR_TEMPLATE -Wno-undefined-var-template)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_INSTANTIATION_AFTER_SPECIALIZATION -Wno-instantiation-after-specialization)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_MISLEADING_INDENTATION -Wno-misleading-indentation)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")

View File

@@ -58,6 +58,9 @@ Static Source Code Checking
* check_cppcheck: Run blender source through cppcheck (C & C++).
* check_clang_array: Run blender source through clang array checking script (C & C++).
* check_deprecated: Check if there is any deprecated code to remove.
* check_splint: Run blenders source through splint (C only).
* check_sparse: Run blenders source through sparse (C only).
* check_smatch: Run blenders source through smatch (C only).
* check_descriptions: Check for duplicate/invalid descriptions.
* check_licenses: Check license headers follow the SPDX license specification,
using one of the accepted licenses in 'doc/license/SPDX-license-identifiers.txt'
@@ -471,6 +474,21 @@ check_clang_array: .FORCE
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang_array.py"
check_splint: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_splint.py"
check_sparse: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_sparse.py"
check_smatch: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
check_mypy: .FORCE
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"

View File

@@ -90,26 +90,28 @@ include(cmake/haru.cmake)
# Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
include(cmake/boost.cmake)
include(cmake/pugixml.cmake)
include(cmake/ispc.cmake)
include(cmake/openimagedenoise.cmake)
include(cmake/embree.cmake)
include(cmake/openpgl.cmake)
include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
include(cmake/xml2.cmake)
include(cmake/fribidi.cmake)
include(cmake/harfbuzz.cmake)
if(NOT APPLE)
include(cmake/xr_openxr.cmake)
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
endif()
if(NOT WIN32)
include(cmake/igc.cmake)
include(cmake/gmmlib.cmake)
include(cmake/ocloc.cmake)
endif()
endif()
include(cmake/ispc.cmake)
include(cmake/openimagedenoise.cmake)
# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
include(cmake/embree.cmake)
include(cmake/openpgl.cmake)
include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
include(cmake/xml2.cmake)
# OpenColorIO and dependencies.
include(cmake/expat.cmake)

View File

@@ -156,7 +156,6 @@ download_source(OPENCLHEADERS)
download_source(ICDLOADER)
download_source(MP11)
download_source(SPIRV_HEADERS)
download_source(UNIFIED_RUNTIME)
download_source(IGC)
download_source(IGC_LLVM)
download_source(IGC_OPENCL_CLANG)

View File

@@ -5,9 +5,6 @@
# for now.
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
if(WIN32)
set(LLVM_GENERATOR "Ninja")
else()
@@ -41,18 +38,17 @@ set(DPCPP_EXTRA_ARGS
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
-DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
# Below here is copied from an invocation of buildbot/config.py
-DLLVM_ENABLE_ASSERTIONS=ON
-DLLVM_TARGETS_TO_BUILD=X86
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLIBCLC_TARGETS_TO_BUILD=
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
@@ -108,19 +104,13 @@ add_dependencies(
external_mp11
external_level-zero
external_spirvheaders
external_unifiedruntime
)
if(BUILD_MODE STREQUAL Release AND WIN32)
ExternalProject_Add_Step(external_dpcpp after_install
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld.lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld64.lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe
)
endif()

View File

@@ -59,13 +59,3 @@ ExternalProject_Add(external_spirvheaders
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
ExternalProject_Add(external_unifiedruntime
URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/unifiedruntime
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)

View File

@@ -3,8 +3,6 @@
# Note the utility apps may use png/tiff/gif system libraries, but the
# library itself does not depend on them, so should give no problems.
set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
set(EMBREE_EXTRA_ARGS
-DEMBREE_ISPC_SUPPORT=OFF
-DEMBREE_TUTORIALS=OFF
@@ -33,43 +31,6 @@ if(NOT BLENDER_PLATFORM_ARM)
)
endif()
if(NOT APPLE)
if(WIN32)
# Levels below -O2 don't work well for Embree+SYCL.
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
set(EMBREE_CMAKE_FLAGS
-DCMAKE_BUILD_TYPE=${BUILD_MODE}
-DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
-DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
-DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
-DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
-DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
-DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
-DCMAKE_CXX_STANDARD=17
)
set(EMBREE_EXTRA_ARGS
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
-DEMBREE_SYCL_SUPPORT=ON
${EMBREE_EXTRA_ARGS}
)
else()
set(EMBREE_EXTRA_ARGS
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
-DEMBREE_SYCL_SUPPORT=ON
${EMBREE_EXTRA_ARGS}
)
endif()
endif()
if(TBB_STATIC_LIBRARY)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
@@ -81,25 +42,16 @@ ExternalProject_Add(external_embree
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/embree
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/embree
)
if(NOT APPLE)
add_dependencies(
external_embree
external_tbb
external_dpcpp
)
else()
add_dependencies(
external_embree
external_tbb
)
endif()
add_dependencies(
external_embree
external_tbb
)
if(WIN32)
if(BUILD_MODE STREQUAL Release)
@@ -114,7 +66,6 @@ if(WIN32)
ExternalProject_Add_Step(external_embree after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
DEPENDEES install
)
endif()

View File

@@ -46,7 +46,7 @@ ${temp_LIBDIR}/vpx/lib/pkgconfig:\
${temp_LIBDIR}/theora/lib/pkgconfig:\
${temp_LIBDIR}/openjpeg/lib/pkgconfig:\
${temp_LIBDIR}/opus/lib/pkgconfig:\
${temp_LIBDIR}/aom/lib/pkgconfig:"
${temp_LIBDIR}/aom/lib/pkgconfig"
)
unset(temp_LIBDIR)

View File

@@ -2,45 +2,35 @@
set(FFTW_EXTRA_ARGS)
macro(fftw_build FFTW_POSTFIX)
if(WIN32)
set(FFTW3_PATCH_COMMAND ${PATCH_CMD} --verbose -p 0 -N -d ${BUILD_DIR}/fftw3/src/external_fftw3_${FFTW_POSTFIX} < ${PATCH_DIR}/fftw3.diff)
set(FFTW_EXTRA_ARGS --disable-static --enable-shared)
set(FFTW_INSTALL install-strip)
else()
set(FFTW_EXTRA_ARGS --enable-static)
set(FFTW_INSTALL install)
endif()
ExternalProject_Add(external_fftw3_${FFTW_POSTFIX}
URL file://${PACKAGE_DIR}/${FFTW_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${FFTW_HASH_TYPE}=${FFTW_HASH}
PREFIX ${BUILD_DIR}/fftw3
CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/fftw3/src/external_fftw3_${FFTW_POSTFIX}/ && ${CONFIGURE_COMMAND} ${FFTW_EXTRA_ARGS} ${ARGN} --prefix=${mingw_LIBDIR}/fftw3
PATCH_COMMAND ${FFTW3_PATCH_COMMAND}
BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/fftw3/src/external_fftw3_${FFTW_POSTFIX}/ && make -j${MAKE_THREADS}
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/fftw3/src/external_fftw3_${FFTW_POSTFIX}/ && make ${FFTW_INSTALL}
INSTALL_DIR ${LIBDIR}/fftw3
)
endmacro()
if(WIN32)
set(FFTW3_PATCH_COMMAND ${PATCH_CMD} --verbose -p 0 -N -d ${BUILD_DIR}/fftw3/src/external_fftw3 < ${PATCH_DIR}/fftw3.diff)
set(FFTW_EXTRA_ARGS --disable-static --enable-shared)
set(FFTW_INSTALL install-strip)
else()
set(FFTW_EXTRA_ARGS --enable-static)
set(FFTW_INSTALL install)
endif()
fftw_build(double)
fftw_build(float --enable-float)
ExternalProject_Add(external_fftw3
URL file://${PACKAGE_DIR}/${FFTW_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${FFTW_HASH_TYPE}=${FFTW_HASH}
PREFIX ${BUILD_DIR}/fftw3
CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/fftw3/src/external_fftw3/ && ${CONFIGURE_COMMAND} ${FFTW_EXTRA_ARGS} --prefix=${mingw_LIBDIR}/fftw3
PATCH_COMMAND ${FFTW3_PATCH_COMMAND}
BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/fftw3/src/external_fftw3/ && make -j${MAKE_THREADS}
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/fftw3/src/external_fftw3/ && make ${FFTW_INSTALL}
INSTALL_DIR ${LIBDIR}/fftw3
)
if(MSVC)
set_target_properties(external_fftw3_double PROPERTIES FOLDER Mingw)
set_target_properties(external_fftw3 PROPERTIES FOLDER Mingw)
if(BUILD_MODE STREQUAL Release)
ExternalProject_Add_Step(external_fftw3_double after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/fftw3/lib/libfftw3.dll.a ${HARVEST_TARGET}/fftw3/lib/libfftw3-3.lib
ExternalProject_Add_Step(external_fftw3 after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/fftw3/lib/libfftw3.dll.a ${HARVEST_TARGET}/fftw3/lib/libfftw.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/fftw3/bin/libfftw3-3.dll ${HARVEST_TARGET}/fftw3/lib/libfftw3-3.dll
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/fftw3/include/fftw3.h ${HARVEST_TARGET}/fftw3/include/fftw3.h
DEPENDEES install
)
ExternalProject_Add_Step(external_fftw3_float after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/fftw3/lib/libfftw3f.dll.a ${HARVEST_TARGET}/fftw3/lib/libfftw3f.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/fftw3/bin/libfftw3f-3.dll ${HARVEST_TARGET}/fftw3/lib/libfftw3f-3.dll
DEPENDEES install
)
endif()
endif()

View File

@@ -2,7 +2,7 @@
set(MATERIALX_EXTRA_ARGS
-DMATERIALX_BUILD_PYTHON=ON
-DMATERIALX_BUILD_RENDER=ON
-DMATERIALX_BUILD_RENDER=OFF
-DMATERIALX_INSTALL_PYTHON=OFF
-DMATERIALX_PYTHON_EXECUTABLE=${PYTHON_BINARY}
-DMATERIALX_PYTHON_VERSION=${PYTHON_SHORT_VERSION}

View File

@@ -74,27 +74,6 @@ if(WIN32)
set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
# Set similar flags for CLANG compilation.
set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
if(WITH_OPTIMIZED_DEBUG)
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
else()
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
endif()
set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
if(WITH_OPTIMIZED_DEBUG)
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
else()
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
endif()
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(PLATFORM_FLAGS)
set(PLATFORM_CXX_FLAGS)
set(PLATFORM_CMAKE_FLAGS)

View File

@@ -35,7 +35,7 @@ if(WIN32)
# regardless of the version actually in there.
PATCH_COMMAND mkdir ${PYTHON_EXTERNALS_FOLDER_DOS} &&
mklink /J ${PYTHON_EXTERNALS_FOLDER_DOS}\\zlib-1.2.13 ${ZLIB_SOURCE_FOLDER_DOS} &&
mklink /J ${PYTHON_EXTERNALS_FOLDER_DOS}\\openssl-1.1.1t ${SSL_SOURCE_FOLDER_DOS} &&
mklink /J ${PYTHON_EXTERNALS_FOLDER_DOS}\\openssl-1.1.1q ${SSL_SOURCE_FOLDER_DOS} &&
${CMAKE_COMMAND} -E copy ${ZLIB_SOURCE_FOLDER}/../external_zlib-build/zconf.h ${PYTHON_EXTERNALS_FOLDER}/zlib-1.2.13/zconf.h &&
${PATCH_CMD} --verbose -p1 -d ${BUILD_DIR}/python/src/external_python < ${PATCH_DIR}/python_windows.diff
CONFIGURE_COMMAND echo "."

View File

@@ -15,7 +15,8 @@ if(WIN32)
-D_PXR_CXX_DEFINITIONS=/DBOOST_ALL_NO_LIB
-DCMAKE_SHARED_LINKER_FLAGS_INIT=/LIBPATH:${LIBDIR}/tbb/lib
-DPython_FIND_REGISTRY=NEVER
-DPython3_EXECUTABLE=${PYTHON_BINARY}
-DPYTHON_INCLUDE_DIRS=${LIBDIR}/python/include
-DPYTHON_LIBRARY=${LIBDIR}/python/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}${LIBEXT}
)
if(BUILD_MODE STREQUAL Debug)
list(APPEND USD_PLATFORM_FLAGS -DPXR_USE_DEBUG_PYTHON=ON)
@@ -43,7 +44,6 @@ set(USD_EXTRA_ARGS
${USD_PLATFORM_FLAGS}
-DOPENSUBDIV_ROOT_DIR=${LIBDIR}/opensubdiv
-DOpenImageIO_ROOT=${LIBDIR}/openimageio
-DMaterialX_ROOT=${LIBDIR}/materialx
-DOPENEXR_LIBRARIES=${LIBDIR}/imath/lib/${LIBPREFIX}Imath${OPENEXR_VERSION_POSTFIX}${SHAREDLIBEXT}
-DOPENEXR_INCLUDE_DIR=${LIBDIR}/imath/include
-DImath_DIR=${LIBDIR}/imath
@@ -56,7 +56,7 @@ set(USD_EXTRA_ARGS
-DPXR_BUILD_TUTORIALS=OFF
-DPXR_BUILD_USDVIEW=OFF
-DPXR_ENABLE_HDF5_SUPPORT=OFF
-DPXR_ENABLE_MATERIALX_SUPPORT=ON
-DPXR_ENABLE_MATERIALX_SUPPORT=OFF
-DPXR_ENABLE_OPENVDB_SUPPORT=ON
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
-DPXR_BUILD_MONOLITHIC=ON
@@ -107,7 +107,6 @@ add_dependencies(
external_opensubdiv
external_python
external_openimageio
external_materialx
openvdb
)

View File

@@ -88,9 +88,9 @@ else()
set(OPENEXR_VERSION_POSTFIX)
endif()
set(FREETYPE_VERSION 2.13.0)
set(FREETYPE_VERSION 2.12.1)
set(FREETYPE_URI http://prdownloads.sourceforge.net/freetype/freetype-${FREETYPE_VERSION}.tar.gz)
set(FREETYPE_HASH 98bc3cf234fe88ef3cf24569251fe0a4)
set(FREETYPE_HASH 8bc5c9c9df7ac12c504f8918552a7cf2)
set(FREETYPE_HASH_TYPE MD5)
set(FREETYPE_FILE freetype-${FREETYPE_VERSION}.tar.gz)
SET(FREETYPE_CPE "cpe:2.3:a:freetype:freetype:${FREETYPE_VERSION}:*:*:*:*:*:*:*")
@@ -112,6 +112,7 @@ set(ALEMBIC_URI https://github.com/alembic/alembic/archive/${ALEMBIC_VERSION}.ta
set(ALEMBIC_HASH 2cd8d6e5a3ac4a014e24a4b04f4fadf9)
set(ALEMBIC_HASH_TYPE MD5)
set(ALEMBIC_FILE alembic-${ALEMBIC_VERSION}.tar.gz)
SET(FREETYPE_CPE "cpe:2.3:a:freetype:freetype:${FREETYPE_VERSION}:*:*:*:*:*:*:*")
set(OPENSUBDIV_VERSION v3_5_0)
set(OPENSUBDIV_URI https://github.com/PixarAnimationStudios/OpenSubdiv/archive/${OPENSUBDIV_VERSION}.tar.gz)
@@ -170,11 +171,11 @@ set(OPENIMAGEIO_HASH 7da92a7d6029921a8599a977ff1efa2a)
set(OPENIMAGEIO_HASH_TYPE MD5)
set(OPENIMAGEIO_FILE OpenImageIO-${OPENIMAGEIO_VERSION}.tar.gz)
# 9.1.0 is currently oiio's preferred version although never versions may be available.
# 8.0.0 is currently oiio's preferred version although never versions may be available.
# the preferred version can be found in oiio's externalpackages.cmake
set(FMT_VERSION 9.1.0)
set(FMT_VERSION 8.0.0)
set(FMT_URI https://github.com/fmtlib/fmt/archive/refs/tags/${FMT_VERSION}.tar.gz)
set(FMT_HASH 5dea48d1fcddc3ec571ce2058e13910a0d4a6bab4cc09a809d8b1dd1c88ae6f2)
set(FMT_HASH 7bce0e9e022e586b178b150002e7c2339994e3c2bbe44027e9abb0d60f9cce83)
set(FMT_HASH_TYPE SHA256)
set(FMT_FILE fmt-${FMT_VERSION}.tar.gz)
set(FMT_CPE "cpe:2.3:a:fmt:fmt:${FMT_VERSION}:*:*:*:*:*:*:*")
@@ -208,11 +209,11 @@ set(OSL_FILE OpenShadingLanguage-${OSL_VERSION}.tar.gz)
# BZIP2, FFI, SQLITE and change the versions in this file as well. For compliance
# reasons there can be no exceptions to this.
set(PYTHON_VERSION 3.10.11)
set(PYTHON_VERSION 3.10.9)
set(PYTHON_SHORT_VERSION 3.10)
set(PYTHON_SHORT_VERSION_NO_DOTS 310)
set(PYTHON_URI https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz)
set(PYTHON_HASH 1bf8481a683e0881e14d52e0f23633a6)
set(PYTHON_HASH dc8c0f274b28ee9e95923d20cfc364c9)
set(PYTHON_HASH_TYPE MD5)
set(PYTHON_FILE Python-${PYTHON_VERSION}.tar.xz)
set(PYTHON_CPE "cpe:2.3:a:python:python:${PYTHON_VERSION}:-:*:*:*:*:*:*")
@@ -295,9 +296,9 @@ set(THEORA_HASH b6ae1ee2fa3d42ac489287d3ec34c5885730b1296f0801ae577a35193d3affbc
set(THEORA_HASH_TYPE SHA256)
set(THEORA_FILE libtheora-${THEORA_VERSION}.tar.bz2)
set(FLAC_VERSION 1.4.2)
set(FLAC_VERSION 1.3.4)
set(FLAC_URI http://downloads.xiph.org/releases/flac/flac-${FLAC_VERSION}.tar.xz)
set(FLAC_HASH e322d58a1f48d23d9dd38f432672865f6f79e73a6f9cc5a5f57fcaa83eb5a8e4 )
set(FLAC_HASH 8ff0607e75a322dd7cd6ec48f4f225471404ae2730d0ea945127b1355155e737 )
set(FLAC_HASH_TYPE SHA256)
set(FLAC_FILE flac-${FLAC_VERSION}.tar.xz)
set(FLAC_CPE "cpe:2.3:a:flac_project:flac:${FLAC_VERSION}:*:*:*:*:*:*:*")
@@ -335,9 +336,9 @@ set(OPENJPEG_HASH_TYPE SHA256)
set(OPENJPEG_FILE openjpeg-v${OPENJPEG_VERSION}.tar.gz)
set(OPENJPEG_CPE "cpe:2.3:a:uclouvain:openjpeg:${OPENJPEG_VERSION}:*:*:*:*:*:*:*")
set(FFMPEG_VERSION 6.0)
set(FFMPEG_VERSION 5.1.2)
set(FFMPEG_URI http://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.bz2)
set(FFMPEG_HASH 47d062731c9f66a78380e35a19aac77cebceccd1c7cc309b9c82343ffc430c3d)
set(FFMPEG_HASH 39a0bcc8d98549f16c570624678246a6ac736c066cebdb409f9502e915b22f2b)
set(FFMPEG_HASH_TYPE SHA256)
set(FFMPEG_FILE ffmpeg-${FFMPEG_VERSION}.tar.bz2)
set(FFMPEG_CPE "cpe:2.3:a:ffmpeg:ffmpeg:${FFMPEG_VERSION}:*:*:*:*:*:*:*")
@@ -459,9 +460,9 @@ set(LZMA_HASH_TYPE SHA256)
set(LZMA_FILE xz-${LZMA_VERSION}.tar.bz2)
# NOTE: Python's build has been modified to use our ssl version.
set(SSL_VERSION 1.1.1t)
set(SSL_VERSION 1.1.1q)
set(SSL_URI https://www.openssl.org/source/openssl-${SSL_VERSION}.tar.gz)
set(SSL_HASH 8dee9b24bdb1dcbf0c3d1e9b02fb8f6bf22165e807f45adeb7c9677536859d3b)
set(SSL_HASH d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca)
set(SSL_HASH_TYPE SHA256)
set(SSL_FILE openssl-${SSL_VERSION}.tar.gz)
set(SSL_CPE "cpe:2.3:a:openssl:openssl:${SSL_VERSION}:*:*:*:*:*:*:*")
@@ -469,10 +470,10 @@ set(SSL_CPE "cpe:2.3:a:openssl:openssl:${SSL_VERSION}:*:*:*:*:*:*:*")
# Note: This will *HAVE* to match the version python ships on windows which
# is hardcoded in pythons PCbuild/get_externals.bat for compliance reasons there
# can be no exceptions to this.
set(SQLITE_VERSION 3.40.1)
set(SQLLITE_LONG_VERSION 3400100)
set(SQLITE_VERSION 3.39.4)
set(SQLLITE_LONG_VERSION 3390400)
set(SQLITE_URI https://www.sqlite.org/2022/sqlite-autoconf-${SQLLITE_LONG_VERSION}.tar.gz)
set(SQLITE_HASH b8c2d4bc0094f5c0ce985dc0e237dfcbaa1f6275)
set(SQLITE_HASH c4c5c39269d1b9bb1487cff580c1f583608229b2)
set(SQLITE_HASH_TYPE SHA1)
set(SQLITE_FILE sqlite-autoconf-${SQLLITE_LONG_VERSION}.tar.gz)
set(SQLITE_CPE "cpe:2.3:a:sqlite:sqlite:${SQLITE_VERSION}:*:*:*:*:*:*:*")
@@ -483,9 +484,9 @@ set(EMBREE_HASH dd26617719a587e126b341d1b32f7fd0)
set(EMBREE_HASH_TYPE MD5)
set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)
set(USD_VERSION 23.02)
set(USD_VERSION 22.11)
set(USD_URI https://github.com/PixarAnimationStudios/USD/archive/v${USD_VERSION}.tar.gz)
set(USD_HASH d0c5dfaf1c2f0ba2b4b6976d9a956eef)
set(USD_HASH 8c89459e48a2ef0e7ae9e7e490377507)
set(USD_HASH_TYPE MD5)
set(USD_FILE usd-v${USD_VERSION}.tar.gz)
@@ -598,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
set(OPENPGL_HASH_TYPE SHA256)
set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)
set(LEVEL_ZERO_VERSION v1.8.8)
set(LEVEL_ZERO_VERSION v1.8.5)
set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
set(LEVEL_ZERO_HASH_TYPE SHA256)
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
set(DPCPP_VERSION 2022-12)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
set(DPCPP_VERSION 20221019)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
set(DPCPP_HASH_TYPE SHA256)
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
@@ -619,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
# will take care of building them, unpack is being done in dpcpp_deps.cmake
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
set(VCINTRINSICS_HASH_TYPE SHA256)
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
@@ -656,13 +657,6 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
set(SPIRV_HEADERS_HASH_TYPE SHA256)
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
######################
### DPCPP DEPS END ###
######################
@@ -736,9 +730,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 23.05.25593.18)
set(OCLOC_VERSION 22.49.25018.21)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@@ -14,7 +14,6 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
external_dpcpp -- external_mp11;
external_dpcpp -- external_level_zero;
external_dpcpp -- external_spirvheaders;
external_dpcpp -- external_unifiedruntime;
external_embree -- external_tbb;
external_ffmpeg -- external_zlib;
external_ffmpeg -- external_openjpeg;

View File

@@ -34,156 +34,3 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
libsycldevice-obj
libsycldevice-spv)
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 17eeaafae194..09e6d2217aaa 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
}
assert(BinImages.size() > 0 && "Expected to find at least one device image");
+ // Ignore images with incompatible state. Image is considered compatible
+ // with a target state if an image is already in the target state or can
+ // be brought to target state by compiling/linking/building.
+ //
+ // Example: an image in "executable" state is not compatible with
+ // "input" target state - there is no operation to convert the image it
+ // to "input" state. An image in "input" state is compatible with
+ // "executable" target state because it can be built to get into
+ // "executable" state.
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
+ if (getBinImageState(*It) > TargetState)
+ It = BinImages.erase(It);
+ else
+ ++It;
+ }
+
std::vector<device_image_plain> SYCLDeviceImages;
- for (RTDeviceBinaryImage *BinImage : BinImages) {
- const bundle_state ImgState = getBinImageState(BinImage);
-
- // Ignore images with incompatible state. Image is considered compatible
- // with a target state if an image is already in the target state or can
- // be brought to target state by compiling/linking/building.
- //
- // Example: an image in "executable" state is not compatible with
- // "input" target state - there is no operation to convert the image it
- // to "input" state. An image in "input" state is compatible with
- // "executable" target state because it can be built to get into
- // "executable" state.
- if (ImgState > TargetState)
- continue;
- for (const sycl::device &Dev : Devs) {
+ // If a non-input state is requested, we can filter out some compatible
+ // images and return only those with the highest compatible state for each
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
+ // image, so that any unneeded ones are skipped.
+ // TODO this has no effect if the requested state is input, consider having
+ // a separate branch for that case to avoid unnecessary tracking work.
+ struct DeviceBinaryImageInfo {
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+ bundle_state State = bundle_state::input;
+ int RequirementCounter = 0;
+ };
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
+
+ for (const sycl::device &Dev : Devs) {
+ // Track the highest image state for each requested kernel.
+ using StateImagesPairT =
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
+ using KernelImageMapT =
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
+ KernelImageMapT KernelImageMap;
+ if (!KernelIDs.empty())
+ for (const kernel_id &KernelID : KernelIDs)
+ KernelImageMap.insert({KernelID, {}});
+
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
if (!compatibleWithDevice(BinImage, Dev) ||
!doesDevSupportImgAspects(Dev, *BinImage))
continue;
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
- // Collect kernel names for the image
- {
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
- KernelIDs = m_BinImg2KernelIDs[BinImage];
- // If the image does not contain any non-service kernels we can skip it.
- if (!KernelIDs || KernelIDs->empty())
- continue;
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
+ if (InsertRes.second) {
+ ImgInfo.State = getBinImageState(BinImage);
+ // Collect kernel names for the image
+ {
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
+ }
}
+ const bundle_state ImgState = ImgInfo.State;
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
+ ImgInfo.KernelIDs;
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
+ // If the image does not contain any non-service kernels we can skip it.
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
+ continue;
- SYCLDeviceImages.push_back(
- createSyclObjFromImpl<device_image_plain>(Impl));
- break;
+ // Update tracked information.
+ for (kernel_id &KernelID : *ImageKernelIDs) {
+ StateImagesPairT *StateImagesPair;
+ // If only specific kernels are requested, ignore the rest.
+ if (!KernelIDs.empty()) {
+ auto It = KernelImageMap.find(KernelID);
+ if (It == KernelImageMap.end())
+ continue;
+ StateImagesPair = &It->second;
+ } else
+ StateImagesPair = &KernelImageMap[KernelID];
+
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+
+ if (KernelImages.empty()) {
+ KernelImagesState = ImgState;
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState < ImgState) {
+ for (RTDeviceBinaryImage *Img : KernelImages) {
+ auto It = ImageInfoMap.find(Img);
+ assert(It != ImageInfoMap.end());
+ assert(It->second.RequirementCounter > 0);
+ --(It->second.RequirementCounter);
+ }
+ KernelImages.clear();
+ KernelImages.push_back(BinImage);
+ KernelImagesState = ImgState;
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState == ImgState) {
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ }
+ }
}
}
+ for (const auto &ImgInfoPair : ImageInfoMap) {
+ if (ImgInfoPair.second.RequirementCounter == 0)
+ continue;
+
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
+
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
+ }
+
return SYCLDeviceImages;
}

View File

@@ -149,19 +149,3 @@ index 074f910a2..30f490818 100644
return is_hit_first | is_hit_second;
}
};
diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
--- a/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:23:06.429190200 +0200
+++ b/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:35:01.291938600 +0200
@@ -540,7 +540,12 @@
assert(offset <= geomDescrData.size());
}
+ /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
+#if TBB_VERSION_MAJOR<2021
+ RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
+#else
RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
+#endif
/* estimate static accel size */
BBox1f time_range(0,1);

View File

@@ -9,3 +9,77 @@
enabled libopenmpt && require_pkg_config libopenmpt "libopenmpt >= 0.2.6557" libopenmpt/libopenmpt.h openmpt_module_create -lstdc++ && append libopenmpt_extralibs "-lstdc++"
enabled libopus && {
enabled libopus_decoder && {
--- a/libavcodec/cfhddata.c
+++ b/libavcodec/cfhddata.c
@@ -276,10 +276,10 @@
av_cold int ff_cfhd_init_vlcs(CFHDContext *s)
{
int i, j, ret = 0;
- uint32_t new_cfhd_vlc_bits[NB_VLC_TABLE_18 * 2];
- uint8_t new_cfhd_vlc_len[NB_VLC_TABLE_18 * 2];
- uint16_t new_cfhd_vlc_run[NB_VLC_TABLE_18 * 2];
- int16_t new_cfhd_vlc_level[NB_VLC_TABLE_18 * 2];
+ uint32_t *new_cfhd_vlc_bits = av_calloc(sizeof(uint32_t), NB_VLC_TABLE_18 * 2);
+ uint8_t *new_cfhd_vlc_len = av_calloc(sizeof(uint8_t), NB_VLC_TABLE_18 * 2);
+ uint16_t *new_cfhd_vlc_run = av_calloc(sizeof(uint16_t), NB_VLC_TABLE_18 * 2);
+ int16_t *new_cfhd_vlc_level = av_calloc(sizeof(int16_t), NB_VLC_TABLE_18 * 2);
/** Similar to dv.c, generate signed VLC tables **/
@@ -305,8 +305,13 @@
ret = init_vlc(&s->vlc_9, VLC_BITS, j, new_cfhd_vlc_len,
1, 1, new_cfhd_vlc_bits, 4, 4, 0);
- if (ret < 0)
+ if (ret < 0) {
+ av_free(new_cfhd_vlc_bits);
+ av_free(new_cfhd_vlc_len);
+ av_free(new_cfhd_vlc_run);
+ av_free(new_cfhd_vlc_level);
return ret;
+ }
for (i = 0; i < s->vlc_9.table_size; i++) {
int code = s->vlc_9.table[i][0];
int len = s->vlc_9.table[i][1];
@@ -346,8 +351,14 @@
ret = init_vlc(&s->vlc_18, VLC_BITS, j, new_cfhd_vlc_len,
1, 1, new_cfhd_vlc_bits, 4, 4, 0);
- if (ret < 0)
+ if (ret < 0) {
+ av_free(new_cfhd_vlc_bits);
+ av_free(new_cfhd_vlc_len);
+ av_free(new_cfhd_vlc_run);
+ av_free(new_cfhd_vlc_level);
return ret;
+ }
+
av_assert0(s->vlc_18.table_size == 4572);
for (i = 0; i < s->vlc_18.table_size; i++) {
@@ -367,5 +378,10 @@
s->table_18_rl_vlc[i].run = run;
}
+ av_free(new_cfhd_vlc_bits);
+ av_free(new_cfhd_vlc_len);
+ av_free(new_cfhd_vlc_run);
+ av_free(new_cfhd_vlc_level);
+
return ret;
}
diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm
index dcf0da6df121..982b2f0bbba1 100644
--- a/libavcodec/x86/simple_idct.asm
+++ b/libavcodec/x86/simple_idct.asm
@@ -25,9 +25,9 @@
%include "libavutil/x86/x86util.asm"
-%if ARCH_X86_32
SECTION_RODATA
+%if ARCH_X86_32
cextern pb_80
wm1010: dw 0, 0xffff, 0, 0xffff

View File

@@ -37,24 +37,18 @@ elseif(HIP_HIPCC_EXECUTABLE)
set(HIP_VERSION_MINOR 0)
set(HIP_VERSION_PATCH 0)
if(WIN32)
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE}.bat)
else()
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
endif()
# Get version from the output.
execute_process(COMMAND ${_hipcc_executable} --version
OUTPUT_VARIABLE _hip_version_raw
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
OUTPUT_VARIABLE HIP_VERSION_RAW
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Parse parts.
if(_hip_version_raw MATCHES "HIP version: .*")
if(HIP_VERSION_RAW MATCHES "HIP version: .*")
# Strip the HIP prefix and get list of individual version components.
string(REGEX REPLACE
".*HIP version: ([.0-9]+).*" "\\1"
HIP_SEMANTIC_VERSION "${_hip_version_raw}")
HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
@@ -77,13 +71,7 @@ elseif(HIP_HIPCC_EXECUTABLE)
# Construct full semantic version.
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
unset(_hip_version_raw)
unset(_hipcc_executable)
unset(HIP_VERSION_RAW)
else()
set(HIP_FOUND FALSE)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HIP
REQUIRED_VARS HIP_HIPCC_EXECUTABLE
VERSION_VAR HIP_VERSION)

View File

@@ -108,11 +108,7 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL
IF(SYCL_FOUND)
SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${SYCL_INCLUDE_DIR}/sycl)
IF(WIN32 AND SYCL_LIBRARY_DEBUG)
SET(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
ELSE()
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
ENDIF()
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
ELSE()
SET(SYCL_SYCL_FOUND FALSE)
ENDIF()

View File

@@ -0,0 +1,58 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "smatch"
CHECKER_ARGS = [
"--full-path",
"--two-passes",
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs] +
source_defines
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (percent_str, c))
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "sparse"
CHECKER_ARGS = [
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs] +
source_defines
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (percent_str, c))
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "splint"
CHECKER_ARGS = [
"-weak",
"-posix-lib",
"-linelen", "10000",
"+ignorequals",
"+relaxtypes",
"-retvalother",
"+matchanyintegral",
"+longintegral",
"+ignoresigns",
"-nestcomment",
"-predboolothers",
"-ifempty",
"-unrecogcomments",
# we may want to remove these later
"-type",
"-fixedformalarray",
"-fullinitblock",
"-fcnuse",
"-initallelements",
"-castfcnptr",
# -forcehints,
"-bufferoverflowhigh", # warns a lot about sprintf()
# re-definitions, rna causes most of these
"-redef",
"-syntax",
# dummy, witjout this splint complains with:
# /usr/include/bits/confname.h:31:27: *** Internal Bug at cscannerHelp.c:2428: Unexpanded macro not function or constant: int _PC_MAX_CANON
"-D_PC_MAX_CANON=0",
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs]
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.write("%s %s\n" % (percent_str, c))
sys.stdout.flush()
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@@ -82,7 +82,7 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@@ -1384,3 +1384,4 @@ macro(windows_process_platform_bundled_libraries library_deps)
endforeach()
endif()
endmacro()

View File

@@ -174,7 +174,7 @@ if(SYSTEMSTUBS_LIBRARY)
list(APPEND PLATFORM_LINKLIBS SystemStubs)
endif()
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing -ffp-contract=off")
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing")
set(PLATFORM_LINKFLAGS
"-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio -framework Metal -framework QuartzCore"
)

View File

@@ -803,7 +803,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
# Automatically turned on when building with "-march=native". This is
# explicitly turned off here as it will make floating point math give a bit
# different results. This will lead to automated test failures. So disable
# this until we support it.
# this until we support it. Seems to default to off in clang and the intel
# compiler.
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
# `maybe-uninitialized` is unreliable in release builds, but fine in debug builds.
@@ -814,49 +815,64 @@ if(CMAKE_COMPILER_IS_GNUCC)
string(PREPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO "${GCC_EXTRA_FLAGS_RELEASE} ")
unset(GCC_EXTRA_FLAGS_RELEASE)
# NOTE(@campbellbarton): Eventually mold will be able to use `-fuse-ld=mold`,
# however at the moment this only works for GCC 12.1+ (unreleased at time of writing).
# So a workaround is used here "-B" which points to another path to find system commands
# such as `ld`.
if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
find_program(MOLD_BIN "mold")
mark_as_advanced(MOLD_BIN)
if(NOT MOLD_BIN)
message(STATUS "The \"mold\" binary could not be found, using system linker.")
set(WITH_LINKER_MOLD OFF)
elseif(CMAKE_C_COMPILER_VERSION VERSION_LESS 12.1)
message(STATUS "GCC 12.1 or newer is required for th MOLD linker.")
set(WITH_LINKER_MOLD OFF)
else()
get_filename_component(MOLD_BIN_DIR "${MOLD_BIN}" DIRECTORY)
# Check if the `-B` argument is required.
# This will happen when `MOLD_BIN` points to a non-standard location.
# Keep this option as mold is not yet a standard system component and
# users may have it installed in some unexpected place.
set(_mold_args "-fuse-ld=mold")
execute_process(
COMMAND ${CMAKE_C_COMPILER} -B ${MOLD_BIN_DIR} ${_mold_args} -Wl,--version
ERROR_QUIET OUTPUT_VARIABLE LD_VERSION_WITH_DIR
# By default mold installs the binary to:
# - `{PREFIX}/bin/mold` as well as a symbolic-link in...
# - `{PREFIX}/lib/mold/ld`.
# (where `PREFIX` is typically `/usr/`).
#
# This block of code finds `{PREFIX}/lib/mold` from the `mold` binary.
# Other methods of searching for the path could also be made to work,
# we could even make our own directory and symbolic-link, however it's more
# convenient to use the one provided by mold.
#
# Use the binary path to "mold", to find the common prefix which contains "lib/mold".
# The parent directory: e.g. `/usr/bin/mold` -> `/usr/bin/`.
get_filename_component(MOLD_PREFIX "${MOLD_BIN}" DIRECTORY)
# The common prefix path: e.g. `/usr/bin/` -> `/usr/` to use as a hint.
get_filename_component(MOLD_PREFIX "${MOLD_PREFIX}" DIRECTORY)
# Find `{PREFIX}/lib/mold/ld`, store the directory component (without the `ld`).
# Then pass `-B {PREFIX}/lib/mold` to GCC so the `ld` located there overrides the default.
find_path(
MOLD_BIN_DIR "ld"
HINTS "${MOLD_PREFIX}"
# The default path is `libexec`, Arch Linux for e.g.
# replaces this with `lib` so check both.
PATH_SUFFIXES "libexec/mold" "lib/mold" "lib64/mold"
NO_DEFAULT_PATH
NO_CACHE
)
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${_mold_args} -Wl,--version
ERROR_QUIET OUTPUT_VARIABLE LD_VERSION
)
if(NOT (LD_VERSION STREQUAL LD_VERSION_WITH_DIR))
string(PREPEND _mold_args "-B \"${MOLD_BIN_DIR}\" ")
set(LD_VERSION "${LD_VERSION_WITH_DIR}")
if(NOT MOLD_BIN_DIR)
message(STATUS
"The mold linker could not find the directory containing the linker command "
"(typically "
"\"${MOLD_PREFIX}/libexec/mold/ld\") or "
"\"${MOLD_PREFIX}/lib/mold/ld\") using system linker."
)
set(WITH_LINKER_MOLD OFF)
endif()
unset(MOLD_PREFIX)
endif()
if("${LD_VERSION}" MATCHES "mold ")
string(APPEND CMAKE_EXE_LINKER_FLAGS " ${_mold_args}")
string(APPEND CMAKE_SHARED_LINKER_FLAGS " ${_mold_args}")
string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${_mold_args}")
set(_IS_LINKER_DEFAULT OFF)
else()
message(STATUS "GNU mold linker isn't available, using the default system linker.")
endif()
unset(_mold_args)
unset(MOLD_BIN_DIR)
unset(LD_VERSION)
if(WITH_LINKER_MOLD)
# GCC will search for `ld` in this directory first.
string(APPEND CMAKE_EXE_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
string(APPEND CMAKE_SHARED_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
string(APPEND CMAKE_MODULE_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
set(_IS_LINKER_DEFAULT OFF)
endif()
unset(MOLD_BIN)
unset(MOLD_BIN_DIR)
endif()
if(WITH_LINKER_GOLD AND _IS_LINKER_DEFAULT)
@@ -891,7 +907,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
# CLang is the same as GCC for now.
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing")
if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
find_program(MOLD_BIN "mold")

View File

@@ -9,7 +9,7 @@ buildbot:
cuda11:
version: '11.4.1'
hip:
version: '5.5.30571'
version: '5.3.22480'
optix:
version: '7.3.0'
ocloc:

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
"""
'''
This script generates the blender.1 man page, embedding the help text
from the Blender executable itself. Invoke it as follows:
@@ -9,7 +9,7 @@ from the Blender executable itself. Invoke it as follows:
where <path-to-blender> is the path to the Blender executable,
and <output-filename> is where to write the generated man page.
"""
'''
import argparse
import os
@@ -87,29 +87,29 @@ def man_page_from_blender_help(fh: TextIO, blender_bin: str, verbose: bool) -> N
(blender_info["date"], blender_info["version"].replace(".", "\\&."))
)
fh.write(r"""
fh.write(r'''
.SH NAME
blender \- a full-featured 3D application""")
blender \- a full-featured 3D application''')
fh.write(r"""
fh.write(r'''
.SH SYNOPSIS
.B blender [args ...] [file] [args ...]""")
.B blender [args ...] [file] [args ...]''')
fh.write(r"""
fh.write(r'''
.br
.SH DESCRIPTION
.PP
.B blender
is a full-featured 3D application. It supports the entirety of the 3D pipeline - """
"""modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
is a full-featured 3D application. It supports the entirety of the 3D pipeline - '''
'''modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
Use Blender to create 3D images and animations, films and commercials, content for games, """
r"""architectural and industrial visualizations, and scientific visualizations.
Use Blender to create 3D images and animations, films and commercials, content for games, '''
r'''architectural and industrial visualizations, and scientific visualizations.
https://www.blender.org""")
https://www.blender.org''')
fh.write(r"""
.SH OPTIONS""")
fh.write(r'''
.SH OPTIONS''')
fh.write("\n\n")
@@ -152,7 +152,7 @@ https://www.blender.org""")
# Footer Content.
fh.write(r"""
fh.write(r'''
.br
.SH SEE ALSO
.B luxrender(1)
@@ -162,7 +162,7 @@ https://www.blender.org""")
This manpage was written for a Debian GNU/Linux system by Daniel Mester
<mester@uni-bremen.de> and updated by Cyril Brulebois
<cyril.brulebois@enst-bretagne.fr> and Dan Eicher <dan@trollwerks.org>.
""")
''')
def create_argparse() -> argparse.ArgumentParser:

View File

@@ -865,40 +865,29 @@ Unfortunate Corner Cases
Besides all expected cases listed above, there are a few others that should not be
an issue but, due to internal implementation details, currently are:
Collection Objects
^^^^^^^^^^^^^^^^^^
Changing: ``Object.hide_viewport``, ``Object.hide_select`` or ``Object.hide_render``
will trigger a rebuild of Collection caches, thus breaking any current iteration over ``Collection.all_objects``.
.. rubric:: Do not:
.. code-block:: python
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
- ``Object.hide_viewport``, ``Object.hide_select`` and ``Object.hide_render``:
Setting any of those Booleans will trigger a rebuild of Collection caches,
thus breaking any current iteration over ``Collection.all_objects``.
.. rubric:: Do:
.. rubric:: Do not:
.. code-block:: python
.. code-block:: python
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
Data-Blocks Renaming During Iteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. rubric:: Do:
Data-blocks accessed from ``bpy.data`` are sorted when their name is set.
Any loop that iterates of a data such as ``bpy.data.objects`` for example,
and sets the objects ``name`` must get all items from the iterator first (typically by converting to a list or tuple)
to avoid missing some objects and iterating over others multiple times.
.. code-block:: python
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
sys.exit

View File

@@ -572,7 +572,7 @@ template<class T> inline bool cmpMinMax(T &minv, T &maxv, const T &val)
}
template<> inline bool cmpMinMax<Vec3>(Vec3 &minv, Vec3 &maxv, const Vec3 &val)
{
return (cmpMinMax(minv.x, maxv.x, val.x) || cmpMinMax(minv.y, maxv.y, val.y) ||
return (cmpMinMax(minv.x, maxv.x, val.x) | cmpMinMax(minv.y, maxv.y, val.y) |
cmpMinMax(minv.z, maxv.z, val.z));
}

View File

@@ -281,9 +281,6 @@ endif()
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
add_definitions(-DWITH_EMBREE_GPU)
endif()
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
include_directories(
SYSTEM

View File

@@ -106,7 +106,7 @@ class CyclesRender(bpy.types.RenderEngine):
from . import osl
osl.update_script_node(node, self.report)
else:
self.report({'ERROR'}, "OSL support disabled in this build")
self.report({'ERROR'}, "OSL support disabled in this build.")
def update_render_passes(self, scene, srl):
engine.register_passes(self, scene, srl)

View File

@@ -172,8 +172,6 @@ def system_info():
def list_render_passes(scene, srl):
import _cycles
crl = srl.cycles
# Combined pass.
@@ -252,12 +250,6 @@ def list_render_passes(scene, srl):
for lightgroup in srl.lightgroups:
yield ("Combined_%s" % lightgroup.name, "RGB", 'COLOR')
# Path guiding debug passes.
if _cycles.with_debug:
yield ("Guiding Color", "RGB", 'COLOR')
yield ("Guiding Probability", "X", 'VALUE')
yield ("Guiding Average Roughness", "X", 'VALUE')
def register_passes(engine, scene, view_layer):
for name, channelids, channeltype in list_render_passes(scene, view_layer):

View File

@@ -1544,13 +1544,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
default=False,
)
use_oneapirt: BoolProperty(
name="Embree on GPU (Experimental)",
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
"However this support is experimental and some scenes may render incorrectly",
default=False,
)
kernel_optimization_level: EnumProperty(
name="Kernel Optimization",
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
@@ -1683,16 +1676,16 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
elif device_type == 'HIP':
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
driver_version = "22.10"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
@@ -1770,11 +1763,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
row = layout.row()
row.use_property_split = True
row.prop(self, "use_oneapirt")
def draw(self, context):
self.draw_impl(self.layout, context)

View File

@@ -803,16 +803,6 @@ static void attr_create_generic(Scene *scene,
num_curves, num_keys, data, element, [&](int i) { return float(src[i]); });
break;
}
case BL::Attribute::data_type_INT32_2D: {
BL::Int2Attribute b_int2_attribute{b_attribute};
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat2, element);
float2 *data = attr->data_float2();
fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
return make_float2(float(src[i][0]), float(src[i][1]));
});
break;
}
case BL::Attribute::data_type_FLOAT_VECTOR: {
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);

View File

@@ -112,26 +112,9 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
device.has_peer_memory = false;
}
bool accumulated_use_hardware_raytracing = false;
foreach (
DeviceInfo &info,
(device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
info.use_hardware_raytracing = false;
}
if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
info.use_hardware_raytracing = false;
}
/* There is an accumulative logic here, because Multi-devices are support only for
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
* global boolean backend setting (see above) for enabling/disabling HW RT,
* so all sub-devices in the multi-device should enable (or disable) HW RT
* simultaneously (and CPU device are expected to ignore `use_hardware_raytracing` setting). */
accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
if (get_boolean(cpreferences, "use_metalrt")) {
device.use_metalrt = true;
}
device.use_hardware_raytracing = accumulated_use_hardware_raytracing;
if (preview) {
/* Disable specialization for preview renders. */

View File

@@ -280,7 +280,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
assert(0);
}
else {
const int2 *edges = static_cast<const int2 *>(b_mesh.edges[0].ptr.data);
const MEdge *edges = static_cast<const MEdge *>(b_mesh.edges[0].ptr.data);
const size_t verts_num = b_mesh.vertices.length();
vector<int> count(verts_num, 0);
@@ -288,11 +288,11 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
for (int i = 0; i < edges_num; i++) {
TypeInCycles value = get_value_at_index(i);
const int2 &b_edge = edges[i];
data[b_edge[0]] += value;
data[b_edge[1]] += value;
count[b_edge[0]]++;
count[b_edge[1]]++;
const MEdge &b_edge = edges[i];
data[b_edge.v1] += value;
data[b_edge.v2] += value;
count[b_edge.v1]++;
count[b_edge.v2]++;
}
for (size_t i = 0; i < verts_num; i++) {
@@ -528,19 +528,6 @@ static void attr_create_generic(Scene *scene,
});
break;
}
case BL::Attribute::data_type_INT32_2D: {
BL::Int2Attribute b_int2_attribute{b_attribute};
if (b_int2_attribute.data.length() == 0) {
continue;
}
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat2, element);
float2 *data = attr->data_float2();
fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
return make_float2(float(src[i][0]), float(src[i][1]));
});
break;
}
default:
/* Not supported. */
break;
@@ -796,13 +783,13 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
EdgeMap visited_edges;
memset(&counter[0], 0, sizeof(int) * counter.size());
const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
const int edges_num = b_mesh.edges.length();
for (int i = 0; i < edges_num; i++) {
const int2 &b_edge = edges[i];
const int v0 = vert_orig_index[b_edge[0]];
const int v1 = vert_orig_index[b_edge[1]];
const MEdge &b_edge = edges[i];
const int v0 = vert_orig_index[b_edge.v1];
const int v1 = vert_orig_index[b_edge.v2];
if (visited_edges.exists(v0, v1)) {
continue;
}
@@ -838,9 +825,9 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
memset(&counter[0], 0, sizeof(int) * counter.size());
visited_edges.clear();
for (int i = 0; i < edges_num; i++) {
const int2 &b_edge = edges[i];
const int v0 = vert_orig_index[b_edge[0]];
const int v1 = vert_orig_index[b_edge[1]];
const MEdge &b_edge = edges[i];
const int v0 = vert_orig_index[b_edge.v1];
const int v1 = vert_orig_index[b_edge.v2];
if (visited_edges.exists(v0, v1)) {
continue;
}
@@ -907,12 +894,12 @@ static void attr_create_random_per_island(Scene *scene,
DisjointSet vertices_sets(number_of_vertices);
const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
const int edges_num = b_mesh.edges.length();
const int *corner_verts = find_corner_vert_attribute(b_mesh);
for (int i = 0; i < edges_num; i++) {
vertices_sets.join(edges[i][0], edges[i][1]);
vertices_sets.join(edges[i].v1, edges[i].v2);
}
AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
@@ -1234,12 +1221,12 @@ static void create_subd_mesh(Scene *scene,
mesh->reserve_subd_creases(num_creases);
const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
for (int i = 0; i < edges_num; i++) {
const float crease = creases[i];
if (crease != 0.0f) {
const int2 &b_edge = edges[i];
mesh->add_edge_crease(b_edge[0], b_edge[1], crease);
const MEdge &b_edge = edges[i];
mesh->add_edge_crease(b_edge.v1, b_edge.v2, crease);
}
}
}

View File

@@ -102,16 +102,6 @@ static void copy_attributes(PointCloud *pointcloud,
}
break;
}
case BL::Attribute::data_type_INT32_2D: {
BL::Int2Attribute b_int2_attribute{b_attribute};
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat2, element);
float2 *data = attr->data_float2();
for (int i = 0; i < num_points; i++) {
data[i] = make_float2(float(src[i][0]), float(src[i][1]));
}
break;
}
case BL::Attribute::data_type_FLOAT_VECTOR: {
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);

View File

@@ -1034,14 +1034,6 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
#endif /* WITH_EMBREE */
#ifdef WITH_EMBREE_GPU
PyModule_AddObject(mod, "with_embree_gpu", Py_True);
Py_INCREF(Py_True);
#else /* WITH_EMBREE_GPU */
PyModule_AddObject(mod, "with_embree_gpu", Py_False);
Py_INCREF(Py_False);
#endif /* WITH_EMBREE_GPU */
if (ccl::openimagedenoise_supported()) {
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
Py_INCREF(Py_True);

View File

@@ -1061,7 +1061,7 @@ void BlenderSession::ensure_display_driver_if_needed()
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(
b_engine, b_scene, background);
display_driver_ = display_driver.get();
session->set_display_driver(std::move(display_driver));
session->set_display_driver(move(display_driver));
}
CCL_NAMESPACE_END

View File

@@ -981,8 +981,22 @@ static ShaderNode *add_node(Scene *scene,
sky->set_sun_disc(b_sky_node.sun_disc());
sky->set_sun_size(b_sky_node.sun_size());
sky->set_sun_intensity(b_sky_node.sun_intensity());
sky->set_sun_elevation(b_sky_node.sun_elevation());
sky->set_sun_rotation(b_sky_node.sun_rotation());
/* Patch sun position to be able to animate daylight cycle while keeping the shading code
* simple. */
float sun_rotation = b_sky_node.sun_rotation();
/* Wrap into [-2PI..2PI] range. */
float sun_elevation = fmodf(b_sky_node.sun_elevation(), M_2PI_F);
/* Wrap into [-PI..PI] range. */
if (fabsf(sun_elevation) >= M_PI_F) {
sun_elevation -= copysignf(2.0f, sun_elevation) * M_PI_F;
}
/* Wrap into [-PI/2..PI/2] range while keeping the same absolute position. */
if (sun_elevation >= M_PI_2_F || sun_elevation <= -M_PI_2_F) {
sun_elevation = copysignf(M_PI_F, sun_elevation) - sun_elevation;
sun_rotation += M_PI_F;
}
sky->set_sun_elevation(sun_elevation);
sky->set_sun_rotation(sun_rotation);
sky->set_altitude(b_sky_node.altitude());
sky->set_air_density(b_sky_node.air_density());
sky->set_dust_density(b_sky_node.dust_density());

View File

@@ -634,10 +634,6 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER, false);
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT, false);
MAP_PASS("Guiding Color", PASS_GUIDING_COLOR, false);
MAP_PASS("Guiding Probability", PASS_GUIDING_PROBABILITY, false);
MAP_PASS("Guiding Average Roughness", PASS_GUIDING_AVG_ROUGHNESS, false);
if (string_startswith(name, cryptomatte_prefix)) {
type = PASS_CRYPTOMATTE;
mode = PassMode::DENOISED;
@@ -688,6 +684,18 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
}
scene->film->set_cryptomatte_passes(cryptomatte_passes);
/* Path guiding debug passes. */
#ifdef WITH_CYCLES_DEBUG
b_engine.add_pass("Guiding Color", 3, "RGB", b_view_layer.name().c_str());
pass_add(scene, PASS_GUIDING_COLOR, "Guiding Color", PassMode::NOISY);
b_engine.add_pass("Guiding Probability", 1, "X", b_view_layer.name().c_str());
pass_add(scene, PASS_GUIDING_PROBABILITY, "Guiding Probability", PassMode::NOISY);
b_engine.add_pass("Guiding Average Roughness", 1, "X", b_view_layer.name().c_str());
pass_add(scene, PASS_GUIDING_AVG_ROUGHNESS, "Guiding Average Roughness", PassMode::NOISY);
#endif
unordered_set<string> expected_passes;
/* Custom AOV passes. */

View File

@@ -527,7 +527,7 @@ BVHNode *BVHBuild::run()
if (progress.get_cancel()) {
rootnode->deleteSubtree();
rootnode = NULL;
VLOG_WORK << "BVH build canceled.";
VLOG_WORK << "BVH build cancelled.";
}
else {
/*rotate(rootnode, 4, 5);*/

View File

@@ -606,7 +606,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
for (size_t i = 0; i < bvh_nodes_size;) {
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
nsize = BVH_UNALIGNED_NODE_SIZE;

View File

@@ -111,13 +111,9 @@ BVHEmbree::~BVHEmbree()
}
}
void BVHEmbree::build(Progress &progress,
Stats *stats,
RTCDevice rtc_device_,
const bool rtc_device_is_sycl_)
void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
{
rtc_device = rtc_device_;
rtc_device_is_sycl = rtc_device_is_sycl_;
assert(rtc_device);
rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
@@ -270,29 +266,15 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
const int *triangles = mesh->get_triangles().data();
if (!rtc_device_is_sycl) {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
}
else {
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
* happen on GPU, and we cannot use standard host pointers at this point. So instead
* of making a shared geometry buffer - a new Embree buffer will be created and data
* will be copied. */
int *triangles_buffer = (int *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
assert(triangles_buffer);
if (triangles_buffer) {
static_assert(sizeof(int) == sizeof(uint));
std::memcpy(triangles_buffer, triangles, sizeof(int) * 3 * (num_triangles));
}
}
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
set_tri_vertex_buffer(geom_id, mesh, false);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
@@ -341,38 +323,14 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
}
else {
if (!rtc_device_is_sycl) {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
}
else {
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
* happen on GPU, and we cannot use standard host pointers at this point. So instead
* of making a shared geometry buffer - a new Embree buffer will be created and data
* will be copied. */
/* As float3 is packed on GPU side, we map it to packed_float3. */
packed_float3 *verts_buffer = (packed_float3 *)rtcSetNewGeometryBuffer(
geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
sizeof(packed_float3),
num_verts + 1);
assert(verts_buffer);
if (verts_buffer) {
for (size_t i = (size_t)0; i < num_verts + 1; ++i) {
verts_buffer[i].x = verts[i].x;
verts_buffer[i].y = verts[i].y;
verts_buffer[i].z = verts[i].z;
}
}
}
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
}
}
}

View File

@@ -29,10 +29,7 @@ class PointCloud;
class BVHEmbree : public BVH {
public:
void build(Progress &progress,
Stats *stats,
RTCDevice rtc_device,
const bool isSyclEmbreeDevice = false);
void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
void refit(Progress &progress);
RTCScene scene;
@@ -58,7 +55,6 @@ class BVHEmbree : public BVH {
const bool update);
RTCDevice rtc_device;
bool rtc_device_is_sycl;
enum RTCBuildQuality build_quality;
};

View File

@@ -42,19 +42,15 @@ endif()
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
if(UNIX)
# Disabled until there is a HIP 5.5 release for Linux.
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
else()
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
find_package(HIP 5.5.0)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
endif()
# find_package(HIP)
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# if(HIP_FOUND)
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
# endif()
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
texture_info.free();
}
BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
#ifdef WITH_EMBREE

View File

@@ -56,7 +56,7 @@ class CPUDevice : public Device {
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
~CPUDevice();
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */

View File

@@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
return path_exists(cubins_path);
}
BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}

View File

@@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {
static bool have_precompiled_kernels();
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;

View File

@@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_guiding = true;
info.has_profiling = true;
info.has_peer_memory = false;
info.use_hardware_raytracing = false;
info.use_metalrt = false;
info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
@@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_guiding &= device.has_guiding;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
info.use_hardware_raytracing |= device.use_hardware_raytracing;
info.use_metalrt |= device.use_metalrt;
info.denoisers &= device.denoisers;
}

View File

@@ -71,16 +71,15 @@ class DeviceInfo {
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
*/
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
* kernels (Metal only). */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
@@ -102,7 +101,7 @@ class DeviceInfo {
has_profiling = false;
has_peer_memory = false;
has_gpu_queue = false;
use_hardware_raytracing = false;
use_metalrt = false;
denoisers = DENOISER_NONE;
}
@@ -158,7 +157,7 @@ class Device {
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* statistics */
Stats &stats;

View File

@@ -20,7 +20,7 @@ class DummyDevice : public Device {
~DummyDevice() {}
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
virtual BVHLayoutMask get_bvh_layout_mask() const override
{
return 0;
}

View File

@@ -137,7 +137,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
info.num = num;
info.has_nanovdb = true;
info.has_light_tree = true;
info.has_light_tree = false;
info.denoisers = 0;
info.has_gpu_queue = true;

View File

@@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
return path_exists(fatbins_path);
}
BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}

View File

@@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {
static bool have_precompiled_kernels();
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;

View File

@@ -3,9 +3,7 @@
#include "device/kernel.h"
#ifndef __KERNEL_ONEAPI__
# include "util/log.h"
#endif
#include "util/log.h"
CCL_NAMESPACE_BEGIN
@@ -155,13 +153,10 @@ const char *device_kernel_as_string(DeviceKernel kernel)
case DEVICE_KERNEL_NUM:
break;
};
#ifndef __KERNEL_ONEAPI__
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
#endif
return "UNKNOWN";
}
#ifndef __KERNEL_ONEAPI__
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
{
os << device_kernel_as_string(kernel);
@@ -183,6 +178,5 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)
return str;
}
#endif
CCL_NAMESPACE_END

View File

@@ -3,13 +3,11 @@
#pragma once
#ifndef __KERNEL_ONEAPI__
# include "kernel/types.h"
#include "kernel/types.h"
# include "util/string.h"
#include "util/string.h"
# include <ostream> // NOLINT
#endif
#include <ostream> // NOLINT
CCL_NAMESPACE_BEGIN
@@ -17,12 +15,9 @@ bool device_kernel_has_shading(DeviceKernel kernel);
bool device_kernel_has_intersection(DeviceKernel kernel);
const char *device_kernel_as_string(DeviceKernel kernel);
#ifndef __KERNEL_ONEAPI__
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
typedef uint64_t DeviceKernelMask;
string device_kernel_mask_as_string(DeviceKernelMask mask);
#endif
CCL_NAMESPACE_END

View File

@@ -100,7 +100,7 @@ class MetalDevice : public Device {
virtual void cancel() override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;

View File

@@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
return get_device_by_ID(ID, lock) == nullptr;
}
BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
{
return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
}
@@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
case METAL_GPU_AMD: {
max_threads_per_threadgroup = 128;
use_metalrt = info.use_hardware_raytracing;
use_metalrt = info.use_metalrt;
break;
}
case METAL_GPU_APPLE: {
max_threads_per_threadgroup = 512;
use_metalrt = info.use_hardware_raytracing;
use_metalrt = info.use_metalrt;
break;
}
}

View File

@@ -96,13 +96,12 @@ class MultiDevice : public Device {
return error_msg;
}
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
virtual BVHLayoutMask get_bvh_layout_mask() const override
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
foreach (const SubDevice &sub_device, devices) {
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
kernel_features);
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
bvh_layout_mask &= device_bvh_layout_mask;
bvh_layout_mask_all |= device_bvh_layout_mask;
}

View File

@@ -40,12 +40,12 @@ bool device_oneapi_init()
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
}
if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
_putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
}
else {
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
}
}
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
@@ -58,10 +58,10 @@ bool device_oneapi_init()
setenv("SYCL_CACHE_PERSISTENT", "1", false);
setenv("SYCL_CACHE_THRESHOLD", "0", false);
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
}
else {
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
}
setenv("SYCL_ENABLE_PCI", "1", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
@@ -87,8 +87,7 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
}
#ifdef WITH_ONEAPI
static void device_iterator_cb(
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
{
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
@@ -113,13 +112,6 @@ static void device_iterator_cb(
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
info.display_device = false;
# ifdef WITH_EMBREE_GPU
info.use_hardware_raytracing = hwrt_support;
# else
info.use_hardware_raytracing = false;
(void)hwrt_support;
# endif
devices->push_back(info);
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
}

View File

@@ -8,19 +8,7 @@
# include "util/debug.h"
# include "util/log.h"
# ifdef WITH_EMBREE_GPU
# include "bvh/embree.h"
# endif
# include "kernel/device/oneapi/globals.h"
# include "kernel/device/oneapi/kernel.h"
# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
/* These declarations are missing from embree headers when compiling from a compiler that doesn't
* support SYCL. */
extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
# endif
CCL_NAMESPACE_BEGIN
@@ -34,29 +22,16 @@ static void queue_error_cb(const char *message, void *user_ptr)
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler),
device_queue_(nullptr),
# ifdef WITH_EMBREE_GPU
embree_device(nullptr),
embree_scene(nullptr),
# endif
texture_info_(this, "texture_info", MEM_GLOBAL),
kg_memory_(nullptr),
kg_memory_device_(nullptr),
kg_memory_size_(0)
{
need_texture_info_ = false;
use_hardware_raytracing = info.use_hardware_raytracing;
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
bool is_finished_ok = create_queue(device_queue_,
info.num,
# ifdef WITH_EMBREE_GPU
use_hardware_raytracing ? &embree_device : nullptr
# else
nullptr
# endif
);
bool is_finished_ok = create_queue(device_queue_, info.num);
if (is_finished_ok == false) {
set_error("oneAPI queue initialization error: got runtime exception \"" +
oneapi_error_string_ + "\"");
@@ -67,16 +42,6 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
assert(device_queue_);
}
# ifdef WITH_EMBREE_GPU
use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
# else
use_hardware_raytracing = false;
# endif
if (use_hardware_raytracing) {
VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
}
size_t globals_segment_size;
is_finished_ok = kernel_globals_size(globals_segment_size);
if (is_finished_ok == false) {
@@ -99,11 +64,6 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
OneapiDevice::~OneapiDevice()
{
# ifdef WITH_EMBREE_GPU
if (embree_device)
rtcReleaseDevice(embree_device);
# endif
texture_info_.free();
usm_free(device_queue_, kg_memory_);
usm_free(device_queue_, kg_memory_device_);
@@ -120,47 +80,15 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
return false;
}
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
{
/* MNEE and Ray-trace kernels currently don't work correctly with HWRT. */
return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
return BVH_LAYOUT_BVH2;
}
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
{
return (use_hardware_raytracing &&
can_use_hardware_raytracing_for_features(requested_features)) ?
BVH_LAYOUT_EMBREE :
BVH_LAYOUT_BVH2;
}
# ifdef WITH_EMBREE_GPU
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
if (refit) {
bvh_embree->refit(progress);
}
else {
bvh_embree->build(progress, &stats, embree_device, true);
}
if (bvh->params.top_level) {
embree_scene = bvh_embree->scene;
}
}
else {
Device::build_bvh(bvh, progress, refit);
}
}
# endif
bool OneapiDevice::load_kernels(const uint requested_features)
{
assert(device_queue_);
kernel_features = requested_features;
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
if (is_finished_ok == false) {
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
@@ -172,14 +100,7 @@ bool OneapiDevice::load_kernels(const uint requested_features)
assert(device_queue_);
}
if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
VLOG_INFO
<< "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
use_hardware_raytracing = false;
}
is_finished_ok = oneapi_load_kernels(
device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
if (is_finished_ok == false) {
set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
}
@@ -406,16 +327,6 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
<< string_human_readable_number(size) << " bytes. ("
<< string_human_readable_size(size) << ")";
# ifdef WITH_EMBREE_GPU
if (strcmp(name, "data") == 0) {
assert(size <= sizeof(KernelData));
/* Update scene handle(since it is different for each device on multi devices) */
KernelData *const data = (KernelData *)host;
data->device_bvh = embree_scene;
}
# endif
ConstMemMap::iterator i = const_mem_map_.find(name);
device_vector<uchar> *data;
@@ -535,9 +446,7 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
# endif
}
bool OneapiDevice::create_queue(SyclQueue *&external_queue,
int device_index,
void *embree_device_pointer)
bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
{
bool finished_correct = true;
try {
@@ -548,13 +457,6 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue,
sycl::queue *created_queue = new sycl::queue(devices[device_index],
sycl::property::queue::in_order());
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
# ifdef WITH_EMBREE_GPU
if (embree_device_pointer) {
*((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
}
# else
(void)embree_device_pointer;
# endif
}
catch (sycl::exception const &e) {
finished_correct = false;
@@ -723,8 +625,7 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
size_t global_size,
void **args)
{
return oneapi_enqueue_kernel(
kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
}
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
@@ -866,9 +767,9 @@ char *OneapiDevice::device_capabilities()
sycl::id<3> max_work_item_sizes =
device.get_info<sycl::info::device::max_work_item_sizes<3>>();
WRITE_ATTR(max_work_item_sizes_dim0, ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR(max_work_item_sizes_dim1, ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR(max_work_item_sizes_dim2, ((size_t)max_work_item_sizes.get(2)))
WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
GET_NUM_ATTR(max_work_group_size)
GET_NUM_ATTR(max_num_sub_groups)
@@ -891,7 +792,7 @@ char *OneapiDevice::device_capabilities()
GET_NUM_ATTR(native_vector_width_half)
size_t max_clock_frequency = device.get_info<sycl::info::device::max_clock_frequency>();
WRITE_ATTR(max_clock_frequency, max_clock_frequency)
WRITE_ATTR("max_clock_frequency", max_clock_frequency)
GET_NUM_ATTR(address_bits)
GET_NUM_ATTR(max_mem_alloc_size)
@@ -900,7 +801,7 @@ char *OneapiDevice::device_capabilities()
* supported so we always return false, even if device supports HW texture usage acceleration.
*/
bool image_support = false;
WRITE_ATTR(image_support, (size_t)image_support)
WRITE_ATTR("image_support", (size_t)image_support)
GET_NUM_ATTR(max_parameter_size)
GET_NUM_ATTR(mem_base_addr_align)
@@ -929,17 +830,12 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
std::string name = device.get_info<sycl::info::device::name>();
# else
std::string name = "SYCL Host Task (Debug)";
# endif
# ifdef WITH_EMBREE_GPU
bool hwrt_support = rtcIsSYCLDeviceSupported(device);
# else
bool hwrt_support = false;
# endif
std::string id = "ONEAPI_" + platform_name + "_" + name;
if (device.has(sycl::aspect::ext_intel_pci_address)) {
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
}
(cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
(cb)(id.c_str(), name.c_str(), num, user_ptr);
num++;
}
}

View File

@@ -16,16 +16,15 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
typedef void (*OneAPIDeviceIteratorCallback)(
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);
typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
const char *name,
int num,
void *user_ptr);
class OneapiDevice : public Device {
private:
SyclQueue *device_queue_;
# ifdef WITH_EMBREE_GPU
RTCDevice embree_device;
RTCScene embree_scene;
# endif
using ConstMemMap = map<string, device_vector<uchar> *>;
ConstMemMap const_mem_map_;
device_vector<TextureInfo> texture_info_;
@@ -35,21 +34,17 @@ class OneapiDevice : public Device {
size_t kg_memory_size_ = (size_t)0;
size_t max_memory_on_device_ = (size_t)0;
std::string oneapi_error_string_;
bool use_hardware_raytracing = false;
unsigned int kernel_features = 0;
public:
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~OneapiDevice();
# ifdef WITH_EMBREE_GPU
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
# endif
bool check_peer_access(Device *peer_device) override;
bool load_kernels(const uint kernel_features) override;
bool load_kernels(const uint requested_features) override;
void load_texture_info();
@@ -118,9 +113,8 @@ class OneapiDevice : public Device {
SyclQueue *sycl_queue();
protected:
bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
bool create_queue(SyclQueue *&external_queue, int device_index);
void free_queue(SyclQueue *queue);
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);

View File

@@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
return make_unique<OptiXDeviceQueue>(this);
}
BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
{
/* OptiX has its own internal acceleration structure format. */
return BVH_LAYOUT_OPTIX;

View File

@@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
~OptiXDevice();
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
BVHLayoutMask get_bvh_layout_mask() const override;
string compile_kernel_get_common_cflags(const uint kernel_features);

View File

@@ -574,7 +574,7 @@ void PathTrace::denoise(const RenderWork &render_work)
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
{
output_driver_ = std::move(driver);
output_driver_ = move(driver);
}
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
@@ -585,7 +585,7 @@ void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
destroy_gpu_resources();
if (driver) {
display_ = make_unique<PathTraceDisplay>(std::move(driver));
display_ = make_unique<PathTraceDisplay>(move(driver));
}
else {
display_ = nullptr;

View File

@@ -9,9 +9,7 @@
CCL_NAMESPACE_BEGIN
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(std::move(driver))
{
}
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) {}
void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
{

View File

@@ -357,12 +357,8 @@ void PathTraceWorkCPU::guiding_push_sample_data_to_global_storage(
# if PATH_GUIDING_LEVEL >= 2
const bool use_direct_light = kernel_data.integrator.use_guiding_direct_light;
const bool use_mis_weights = kernel_data.integrator.use_guiding_mis_weights;
# if OPENPGL_VERSION_MINOR >= 5
kg->opgl_path_segment_storage->PrepareSamples(use_mis_weights, use_direct_light, false);
# else
kg->opgl_path_segment_storage->PrepareSamples(
false, nullptr, use_mis_weights, use_direct_light, false);
# endif
# endif
# ifdef WITH_CYCLES_DEBUG

View File

@@ -28,7 +28,6 @@ static size_t estimate_single_state_size(const uint kernel_features)
#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
state_size += (kernel_features & (feature)) ? sizeof(type) : 0;
#define KERNEL_STRUCT_END(name) \
(void)array_index; \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@@ -140,7 +139,6 @@ void PathTraceWorkGPU::alloc_integrator_soa()
integrator_state_gpu_.parent_struct[array_index].name = (type *)array->device_pointer; \
}
#define KERNEL_STRUCT_END(name) \
(void)array_index; \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@@ -301,8 +299,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
* become busy after adding new tiles). This is especially important for the shadow catcher which
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
work_tile_scheduler_.set_accelerated_rt(
(device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
0);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,

View File

@@ -55,29 +55,21 @@ void WorkTileScheduler::reset_scheduler_state()
VLOG_WORK << "Will schedule tiles of size " << tile_size_;
const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
tile_size_.num_samples;
if (num_path_states_in_tile == 0) {
num_tiles_x_ = 0;
num_tiles_y_ = 0;
num_tiles_per_sample_range_ = 0;
if (VLOG_IS_ON(3)) {
/* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile scheduling
* and purely focusing on the number of used path states. */
const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
tile_size_.num_samples;
const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
VLOG_WORK << "Number of unused path states: "
<< max_num_path_states_ - num_tiles * num_path_states_in_tile;
}
else {
if (VLOG_IS_ON(3)) {
/* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
* scheduling and purely focusing on the number of used path states. */
const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
VLOG_WORK << "Number of unused path states: "
<< max_num_path_states_ - num_tiles * num_path_states_in_tile;
}
num_tiles_x_ = divide_up(image_size_px_.x, tile_size_.width);
num_tiles_y_ = divide_up(image_size_px_.y, tile_size_.height);
num_tiles_per_sample_range_ = divide_up(samples_num_, tile_size_.num_samples);
}
num_tiles_x_ = divide_up(image_size_px_.x, tile_size_.width);
num_tiles_y_ = divide_up(image_size_px_.y, tile_size_.height);
total_tiles_num_ = num_tiles_x_ * num_tiles_y_;
num_tiles_per_sample_range_ = divide_up(samples_num_, tile_size_.num_samples);
next_work_index_ = 0;
total_work_size_ = total_tiles_num_ * num_tiles_per_sample_range_;

View File

@@ -96,13 +96,10 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
device/oneapi/compat.h
device/oneapi/context_begin.h
device/oneapi/context_end.h
device/oneapi/context_intersect_begin.h
device/oneapi/context_intersect_end.h
device/oneapi/globals.h
device/oneapi/image.h
device/oneapi/kernel.h
device/oneapi/kernel_templates.h
device/cpu/bvh.h
)
set(SRC_KERNEL_CLOSURE_HEADERS
@@ -767,7 +764,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# Set defaults for spir64 and spir64_gen options
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
endif()
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
@@ -778,6 +775,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# Host execution won't use GPU binaries, no need to compile them.
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
# AoT binaries aren't currently reused when calling sycl::build.
list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
# Iterate over all targest and their options
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@@ -799,59 +798,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
-I"${NANOVDB_INCLUDE_DIR}")
endif()
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
list(APPEND sycl_compiler_flags
-DWITH_EMBREE
-DWITH_EMBREE_GPU
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
-I"${EMBREE_INCLUDE_DIRS}")
if(WIN32)
list(APPEND sycl_compiler_flags
-ladvapi32.lib
)
endif()
set(next_library_mode "")
foreach(library ${EMBREE_LIBRARIES})
string(TOLOWER "${library}" library_lower)
if(("${library_lower}" STREQUAL "optimized") OR
("${library_lower}" STREQUAL "debug"))
set(next_library_mode "${library_lower}")
else()
if(next_library_mode STREQUAL "")
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
else()
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
endif()
set(next_library_mode "")
endif()
endforeach()
foreach(library ${TBB_LIBRARIES})
string(TOLOWER "${library}" library_lower)
if(("${library_lower}" STREQUAL "optimized") OR
("${library_lower}" STREQUAL "debug"))
set(next_library_mode "${library_lower}")
else()
if(next_library_mode STREQUAL "")
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
else()
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
endif()
set(next_library_mode "")
endif()
endforeach()
list(APPEND sycl_compiler_flags
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
)
endif()
if(WITH_CYCLES_DEBUG)
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
endif()

View File

@@ -21,28 +21,6 @@
# define __BVH2__
#endif
#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
/* bool is apparently not tested for specialization constants:
* https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
* Instead of adding one more bool specialization constant, we reuse existing embree_features one
* and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
*/
/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and ray-trace kernels
* pre-compiled without Embree.
* Changing this default value would require updating the logic in oneapi_load_kernels(). */
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
RTC_FEATURE_FLAG_NONE};
# define IF_USING_EMBREE \
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
RTC_FEATURE_FLAG_NONE)
# define IF_NOT_USING_EMBREE \
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
RTC_FEATURE_FLAG_NONE)
#else
# define IF_USING_EMBREE
# define IF_NOT_USING_EMBREE
#endif
CCL_NAMESPACE_BEGIN
#ifdef __BVH2__
@@ -96,39 +74,30 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
}
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect(kg, ray, visibility, isect);
}
if (kernel_data.device_bvh) {
return kernel_embree_intersect(kg, ray, visibility, isect);
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
# endif /* __HAIR__ */
return bvh_intersect_motion(kg, ray, isect, visibility);
}
return bvh_intersect_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
# endif /* __HAIR__ */
return bvh_intersect(kg, ray, isect, visibility);
}
kernel_assert(false);
return false;
return bvh_intersect(kg, ray, isect, visibility);
}
/* Single object BVH traversal, for SSS/AO/bevel. */
@@ -160,27 +129,17 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_local(
kg, ray, local_isect, local_object, lcg_state, max_hits);
}
if (kernel_data.device_bvh) {
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
kernel_assert(false);
return false;
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
# endif
@@ -225,44 +184,35 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
}
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_shadow_all(
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
}
if (kernel_data.device_bvh) {
return kernel_embree_intersect_shadow_all(
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all(
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
kernel_assert(false);
return false;
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __SHADOW_RECORD_ALL__ */
@@ -289,28 +239,13 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
return false;
}
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, visibility);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
}
kernel_assert(false);
return false;
return bvh_intersect_volume(kg, ray, isect, visibility);
}
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
@@ -340,27 +275,18 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
}
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
}
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
kernel_assert(false);
return false;
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */

View File

@@ -51,6 +51,8 @@ ccl_device_inline
int object = OBJECT_NONE;
float isect_t = ray->tmax;
int num_hits_in_instance = 0;
uint num_hits = 0;
isect_array->t = ray->tmax;
@@ -150,6 +152,7 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
return num_hits;
@@ -190,6 +193,7 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
return num_hits;
@@ -215,6 +219,7 @@ ccl_device_inline
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
++stack_ptr;

View File

@@ -64,7 +64,6 @@ KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf)
KERNEL_DATA_ARRAY(KernelLightTreeNode, light_tree_nodes)
KERNEL_DATA_ARRAY(KernelLightTreeEmitter, light_tree_emitters)
KERNEL_DATA_ARRAY(uint, light_to_tree)
KERNEL_DATA_ARRAY(uint, object_to_tree)
KERNEL_DATA_ARRAY(uint, object_lookup_offset)
KERNEL_DATA_ARRAY(uint, triangle_to_tree)

View File

@@ -20,7 +20,6 @@ KERNEL_STRUCT_BEGIN(KernelBackground, background)
/* xyz store direction, w the angle. float4 instead of float3 is used
* to ensure consistent padding/alignment across devices. */
KERNEL_STRUCT_MEMBER(background, float4, sun)
KERNEL_STRUCT_MEMBER(background, int, use_sun_guiding)
/* Only shader index. */
KERNEL_STRUCT_MEMBER(background, int, surface_shader)
KERNEL_STRUCT_MEMBER(background, int, volume_shader)
@@ -40,10 +39,6 @@ KERNEL_STRUCT_MEMBER(background, int, use_mis)
KERNEL_STRUCT_MEMBER(background, int, lightgroup)
/* Light Index. */
KERNEL_STRUCT_MEMBER(background, int, light_index)
/* Padding. */
KERNEL_STRUCT_MEMBER(background, int, pad1)
KERNEL_STRUCT_MEMBER(background, int, pad2)
KERNEL_STRUCT_MEMBER(background, int, pad3)
KERNEL_STRUCT_END(KernelBackground)
/* BVH: own BVH2 if no native device acceleration struct used. */

View File

@@ -13,13 +13,8 @@
# include <embree3/rtcore_scene.h>
#endif
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/compat.h"
# include "kernel/device/oneapi/globals.h"
#else
# include "kernel/device/cpu/compat.h"
# include "kernel/device/cpu/globals.h"
#endif
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
@@ -38,16 +33,11 @@ using numhit_t = uint8_t;
using numhit_t = uint32_t;
#endif
#ifdef __KERNEL_ONEAPI__
# define CYCLES_EMBREE_USED_FEATURES \
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
#else
# define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#endif
#define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#define EMBREE_IS_HAIR(x) (x & 1)
@@ -109,9 +99,7 @@ struct CCLVolumeContext
#if EMBREE_MAJOR_VERSION >= 4
KernelGlobals kg;
const Ray *ray;
# ifdef __VOLUME_RECORD_ALL__
numhit_t max_hits;
# endif
numhit_t num_hits;
#endif
Intersection *vol_isect;
@@ -264,8 +252,7 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
* Things like recording subsurface or shadow hits for later evaluation
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls. */
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
const RTCFilterFunctionNArguments *args)
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
@@ -276,11 +263,7 @@ ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
if (kernel_embree_is_self_intersection(
@@ -294,7 +277,7 @@ ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@@ -307,11 +290,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
Intersection current_isect;
@@ -347,7 +326,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
}
/* Test if we need to record this transparent intersection. */
const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (ctx->num_recorded_hits < max_record_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
@@ -384,7 +363,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@@ -397,11 +376,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
/* Check if it's hitting the correct object. */
@@ -487,7 +462,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@@ -500,17 +475,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
#ifdef __VOLUME_RECORD_ALL__
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
#endif
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
@@ -527,17 +496,10 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
int object_flag = kernel_data_fetch(object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
#ifndef __VOLUME_RECORD_ALL__
/* Without __VOLUME_RECORD_ALL__ we need only a first counted hit, so we will
* continue tracing only if a current hit is not counted. */
*args->valid = 0;
#endif
}
#ifdef __VOLUME_RECORD_ALL__
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
#endif
}
#if EMBREE_MAJOR_VERSION < 4
@@ -551,14 +513,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL:
kernel_embree_filter_occluded_shadow_all_func_impl(args);
kernel_embree_filter_occluded_shadow_all_func(args);
break;
case CCLIntersectContext::RAY_LOCAL:
case CCLIntersectContext::RAY_SSS:
kernel_embree_filter_occluded_local_func_impl(args);
kernel_embree_filter_occluded_local_func(args);
break;
case CCLIntersectContext::RAY_VOLUME_ALL:
kernel_embree_filter_occluded_volume_all_func_impl(args);
kernel_embree_filter_occluded_volume_all_func(args);
break;
case CCLIntersectContext::RAY_REGULAR:
@@ -607,63 +569,7 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
kernel_embree_filter_occluded_func(args);
}
#endif
#ifdef __KERNEL_ONEAPI__
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_intersection_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_local_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
}
# define kernel_embree_filter_intersection_func \
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
# define kernel_embree_filter_occluded_shadow_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
# define kernel_embree_filter_occluded_local_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
# define kernel_embree_filter_occluded_volume_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
#else
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
# if EMBREE_MAJOR_VERSION >= 4
# define kernel_embree_filter_occluded_shadow_all_func \
kernel_embree_filter_occluded_shadow_all_func_impl
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
# define kernel_embree_filter_occluded_volume_all_func \
kernel_embree_filter_occluded_volume_all_func_impl
# endif
#endif
/* Scene intersection. */
@@ -677,15 +583,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
#if EMBREE_MAJOR_VERSION >= 4
CCLFirstHitContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
#else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
rtcInitIntersectContext(&ctx);
@@ -698,7 +596,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
#if EMBREE_MAJOR_VERSION >= 4
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
@@ -727,15 +625,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
CCLLocalContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg,
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
@@ -756,7 +646,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
# endif
@@ -802,7 +692,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowState state,
IntegratorShadowStateCPU *state,
ccl_private const Ray *ray,
uint visibility,
uint max_hits,
@@ -812,15 +702,7 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
CCLShadowContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
rtcInitIntersectContext(&ctx);
@@ -836,8 +718,7 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_shadow_all_func);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
@@ -855,31 +736,19 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private Intersection *isect,
# ifdef __VOLUME_RECORD_ALL__
const uint max_hits,
# endif
const uint visibility)
{
# if EMBREE_MAJOR_VERSION >= 4
CCLVolumeContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko) Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
rtcInitIntersectContext(&ctx);
# endif
ctx.vol_isect = isect;
# ifdef __VOLUME_RECORD_ALL__
ctx.max_hits = numhit_t(max_hits);
# endif
ctx.num_hits = numhit_t(0);
ctx.ray = ray;
RTCRay rtc_ray;
@@ -887,8 +756,7 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_volume_all_func);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);

View File

@@ -128,12 +128,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
/* Intersection kernels need access to the kernel handler for specialization constants to work
* properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
@@ -191,10 +185,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_background,
ccl_global const int *path_index_array,
@@ -259,12 +249,6 @@ ccl_gpu_kernel_postfix
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
#endif
/* Kernels using intersections need access to the kernel handler for specialization constants to
* work properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
ccl_global const int *path_index_array,
@@ -303,9 +287,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,

View File

@@ -5,11 +5,6 @@
#define __KERNEL_GPU__
#define __KERNEL_ONEAPI__
#define __KERNEL_64_BIT__
#ifdef WITH_EMBREE_GPU
# define __KERNEL_GPU_RAYTRACING__
#endif
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
@@ -62,19 +57,17 @@
#define ccl_gpu_kernel_threads(block_num_threads)
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
# define __ccl_gpu_kernel_signature(name, ...) \
# define ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
size_t kernel_local_size, \
sycl::handler &cgh, \
__VA_ARGS__) { \
(kg); \
cgh.parallel_for( \
cgh.parallel_for<class kernel_##name>( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
[=](sycl::nd_item<1> item) {
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
# define ccl_gpu_kernel_postfix \
}); \
}

View File

@@ -1,18 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2023 Intel Corporation */
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
# undef ccl_gpu_kernel_signature
# define ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
size_t kernel_local_size, \
sycl::handler &cgh, \
__VA_ARGS__) \
{ \
(kg); \
cgh.parallel_for( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
[=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
#endif

View File

@@ -1,7 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2023 Intel Corporation */
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
# undef ccl_gpu_kernel_signature
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
#endif

View File

@@ -31,8 +31,6 @@ typedef struct KernelGlobalsGPU {
size_t nd_item_group_range_0;
size_t nd_item_global_id_0;
size_t nd_item_global_range_0;
#else
sycl::kernel_handler kernel_handler;
#endif
} KernelGlobalsGPU;

View File

@@ -16,22 +16,9 @@
# include "kernel/device/gpu/kernel.h"
# include "device/kernel.cpp"
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
# ifdef WITH_EMBREE_GPU
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
(const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
(const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
# endif
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
@@ -155,99 +142,15 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
return std::min(limit_work_group_size, preferred_work_group_size);
}
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
const uint kernel_features)
{
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
std::string::npos)
return false;
return true;
}
bool oneapi_kernel_is_raytrace_or_mnee(const std::string &kernel_name)
{
return (kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
std::string::npos) ||
(kernel_name.find(device_kernel_as_string(
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) != std::string::npos);
}
bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
{
# ifdef WITH_EMBREE_GPU
/* MNEE and Ray-trace kernels aren't yet enabled to use Embree. */
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
DeviceKernel kernel = (DeviceKernel)i;
if (device_kernel_has_intersection(kernel)) {
if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
return !oneapi_kernel_is_raytrace_or_mnee(kernel_name);
}
}
}
# endif
return false;
}
bool oneapi_load_kernels(SyclQueue *queue_,
const uint kernel_features,
bool use_hardware_raytracing)
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
{
# ifdef SYCL_SKIP_KERNELS_PRELOAD
(void)queue_;
(void)requested_features;
# else
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
# ifdef WITH_EMBREE_GPU
/* For best performance, we always JIT compile the kernels that are using Embree. */
if (use_hardware_raytracing) {
try {
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
{queue->get_device()});
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
const std::string &kernel_name = kernel_id.get_name();
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
!oneapi_kernel_is_using_embree(kernel_name)) {
continue;
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
/* Hair requires embree curves support. */
if (kernel_features & KERNEL_FEATURE_HAIR) {
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
sycl::build(one_kernel_bundle_input);
}
else {
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
sycl::build(one_kernel_bundle_input);
}
}
}
catch (sycl::exception const &e) {
if (s_error_cb) {
s_error_cb(e.what(), s_error_user_ptr);
}
return false;
}
}
# endif
try {
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
@@ -256,29 +159,27 @@ bool oneapi_load_kernels(SyclQueue *queue_,
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
const std::string &kernel_name = kernel_id.get_name();
/* In case HWRT is on, compilation of kernels using Embree is already handled in previous
* block. */
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
(use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
/* NOTE(@nsirgien): Names in this conditions below should match names from
* oneapi_call macro in oneapi_enqueue_kernel below */
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
continue;
}
# ifdef WITH_EMBREE_GPU
if (oneapi_kernel_is_using_embree(kernel_name) ||
oneapi_kernel_is_raytrace_or_mnee(kernel_name)) {
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
RTC_FEATURE_FLAG_NONE);
sycl::build(one_kernel_bundle_input);
if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
continue;
}
# endif
/* This call will ensure that AoT or cached JIT binaries are available
* for execution. It will trigger compilation if it is not already the case. */
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
{kernel_id});
if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
std::string::npos) {
continue;
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
sycl::build(one_kernel_bundle);
}
}
catch (sycl::exception const &e) {
@@ -287,14 +188,13 @@ bool oneapi_load_kernels(SyclQueue *queue_,
}
return false;
}
# endif
return true;
}
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
int kernel,
size_t global_size,
const uint kernel_features,
bool use_hardware_raytracing,
void **args)
{
bool success = true;
@@ -348,21 +248,6 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
try {
queue->submit([&](sycl::handler &cgh) {
# ifdef WITH_EMBREE_GPU
/* Spec says it has no effect if the called kernel doesn't support the below specialization
* constant but it can still trigger a recompilation, so we set it only if needed. */
if (device_kernel_has_intersection(device_kernel)) {
const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
RTC_FEATURE_FLAG_NONE :
!(kernel_features & KERNEL_FEATURE_HAIR) ?
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
used_embree_features);
}
# else
(void)kernel_features;
# endif
switch (device_kernel) {
case DEVICE_KERNEL_INTEGRATOR_RESET: {
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
@@ -664,5 +549,4 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
#endif /* WITH_ONEAPI */

View File

@@ -47,14 +47,10 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
int kernel,
size_t global_size,
const unsigned int kernel_features,
bool use_hardware_raytracing,
void **args);
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
const unsigned int kernel_features,
bool use_hardware_raytracing);
const unsigned int requested_features);
# ifdef __cplusplus
}
# endif
#endif /* WITH_ONEAPI */

View File

@@ -454,13 +454,8 @@ ccl_device_forceinline bool guiding_bsdf_init(KernelGlobals kg,
ccl_private float &rand)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
# if OPENPGL_VERSION_MINOR >= 5
if (kg->opgl_surface_sampling_distribution->Init(
kg->opgl_guiding_field, guiding_point3f(P), rand)) {
# else
if (kg->opgl_surface_sampling_distribution->Init(
kg->opgl_guiding_field, guiding_point3f(P), rand, true)) {
# endif
kg->opgl_surface_sampling_distribution->ApplyCosineProduct(guiding_point3f(N));
return true;
}
@@ -511,13 +506,8 @@ ccl_device_forceinline bool guiding_phase_init(KernelGlobals kg,
return false;
}
# if OPENPGL_VERSION_MINOR >= 5
if (kg->opgl_volume_sampling_distribution->Init(
kg->opgl_guiding_field, guiding_point3f(P), rand)) {
# else
if (kg->opgl_volume_sampling_distribution->Init(
kg->opgl_guiding_field, guiding_point3f(P), rand, true)) {
# endif
kg->opgl_volume_sampling_distribution->ApplySingleLobeHenyeyGreensteinProduct(guiding_vec3f(D),
g);
return true;

View File

@@ -342,7 +342,7 @@ ccl_device_forceinline void area_light_update_position(const ccl_global KernelLi
ls->D = normalize_len(ls->P - P, &ls->t);
ls->pdf = invarea;
if (klight->area.normalize_spread > 0) {
if (klight->area.tan_half_spread > 0) {
ls->eval_fac = 0.25f * invarea;
ls->eval_fac *= area_light_spread_attenuation(
ls->D, ls->Ng, klight->area.tan_half_spread, klight->area.normalize_spread);

View File

@@ -56,7 +56,7 @@ ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
const int index = light_distribution_sample(kg, randn);
const float pdf_selection = kernel_data.integrator.distribution_pdf_lights;
return light_sample<in_volume_segment>(
kg, randu, randv, time, P, bounce, path_flag, index, 0, pdf_selection, ls);
kg, randu, randv, time, P, bounce, path_flag, index, pdf_selection, ls);
}
ccl_device_inline float light_distribution_pdf_lamp(KernelGlobals kg)

View File

@@ -108,7 +108,6 @@ ccl_device_noinline bool light_sample(KernelGlobals kg,
const int bounce,
const uint32_t path_flag,
const int emitter_index,
const int object_id,
const float pdf_selection,
ccl_private LightSample *ls)
{
@@ -118,9 +117,8 @@ ccl_device_noinline bool light_sample(KernelGlobals kg,
if (kernel_data.integrator.use_light_tree) {
ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
emitter_index);
prim = kemitter->light.id;
mesh_light.shader_flag = kemitter->mesh_light.shader_flag;
mesh_light.object_id = object_id;
prim = kemitter->prim_id;
mesh_light = kemitter->mesh_light;
}
else
#endif

View File

@@ -438,9 +438,7 @@ ccl_device_inline float light_sample_mis_weight_forward_surface(KernelGlobals kg
const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
uint lookup_offset = kernel_data_fetch(object_lookup_offset, sd->object);
uint prim_offset = kernel_data_fetch(object_prim_offset, sd->object);
uint triangle = kernel_data_fetch(triangle_to_tree, sd->prim - prim_offset + lookup_offset);
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, sd->object, triangle);
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, sd->prim - prim_offset + lookup_offset);
}
else
#endif
@@ -464,7 +462,7 @@ ccl_device_inline float light_sample_mis_weight_forward_lamp(KernelGlobals kg,
#ifdef __LIGHT_TREE__
if (kernel_data.integrator.use_light_tree) {
const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
pdf *= light_tree_pdf(kg, P, N, path_flag, 0, kernel_data_fetch(light_to_tree, ls->lamp));
pdf *= light_tree_pdf(kg, P, N, path_flag, ~ls->lamp);
}
else
#endif
@@ -498,8 +496,7 @@ ccl_device_inline float light_sample_mis_weight_forward_background(KernelGlobals
#ifdef __LIGHT_TREE__
if (kernel_data.integrator.use_light_tree) {
const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
uint light = kernel_data_fetch(light_to_tree, kernel_data.background.light_index);
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, 0, light);
pdf *= light_tree_pdf(kg, ray_P, N, path_flag, ~kernel_data.background.light_index);
}
else
#endif

View File

@@ -69,59 +69,6 @@ ccl_device float3 compute_v(
cos_phi0 * o0 + dot_o1_a * inv_len * o1;
}
ccl_device_inline bool is_light(const ccl_global KernelLightTreeEmitter *kemitter)
{
return kemitter->light.id < 0;
}
ccl_device_inline bool is_mesh(const ccl_global KernelLightTreeEmitter *kemitter)
{
return !is_light(kemitter) && kemitter->mesh_light.object_id == OBJECT_NONE;
}
ccl_device_inline bool is_triangle(const ccl_global KernelLightTreeEmitter *kemitter)
{
return !is_light(kemitter) && kemitter->mesh_light.object_id != OBJECT_NONE;
}
ccl_device_inline bool is_leaf(const ccl_global KernelLightTreeNode *knode)
{
/* The distant node is also considered o leaf node. */
return knode->type >= LIGHT_TREE_LEAF;
}
template<bool in_volume_segment>
ccl_device void light_tree_to_local_space(KernelGlobals kg,
const int object_id,
ccl_private float3 &P,
ccl_private float3 &N_or_D,
ccl_private float &t)
{
const int object_flag = kernel_data_fetch(object_flag, object_id);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#ifdef __OBJECT_MOTION__
Transform itfm;
object_fetch_transform_motion_test(kg, object_id, 0.5f, &itfm);
#else
const Transform itfm = object_fetch_transform(kg, object_id, OBJECT_INVERSE_TRANSFORM);
#endif
P = transform_point(&itfm, P);
if (in_volume_segment) {
/* Transform direction. */
float3 D_local = transform_direction(&itfm, N_or_D);
float scale;
N_or_D = normalize_len(D_local, &scale);
t *= scale;
}
else if (!is_zero(N_or_D)) {
/* Transform normal. */
const Transform tfm = object_fetch_transform(kg, object_id, OBJECT_TRANSFORM);
N_or_D = normalize(transform_direction_transposed(&tfm, N_or_D));
}
}
}
/* This is the general function for calculating the importance of either a cluster or an emitter.
* Both of the specialized functions obtain the necessary data before calling this function. */
template<bool in_volume_segment>
@@ -237,8 +184,9 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
ccl_private float3 &centroid,
ccl_private packed_float3 &dir)
{
if (is_light(kemitter)) {
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~(kemitter->light.id));
const int prim_id = kemitter->prim_id;
if (prim_id < 0) {
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~prim_id);
centroid = klight->co;
switch (klight->type) {
@@ -265,22 +213,19 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
}
}
else {
kernel_assert(is_triangle(kemitter));
const int object = kemitter->mesh_light.object_id;
float3 vertices[3];
triangle_vertices(kg, kemitter->triangle.id, vertices);
triangle_world_space_vertices(kg, object, prim_id, -1.0f, vertices);
centroid = (vertices[0] + vertices[1] + vertices[2]) / 3.0f;
const bool is_front_only = (kemitter->triangle.emission_sampling == EMISSION_SAMPLING_FRONT);
const bool is_back_only = (kemitter->triangle.emission_sampling == EMISSION_SAMPLING_BACK);
const bool is_front_only = (kemitter->emission_sampling == EMISSION_SAMPLING_FRONT);
const bool is_back_only = (kemitter->emission_sampling == EMISSION_SAMPLING_BACK);
if (is_front_only || is_back_only) {
dir = safe_normalize(cross(vertices[1] - vertices[0], vertices[2] - vertices[0]));
if (is_back_only) {
dir = -dir;
}
const int object_flag = kernel_data_fetch(object_flag, object);
if ((object_flag & SD_OBJECT_TRANSFORM_APPLIED) &&
(object_flag & SD_OBJECT_NEGATIVE_SCALE)) {
if (kernel_data_fetch(object_flag, object) & SD_OBJECT_NEGATIVE_SCALE) {
dir = -dir;
}
}
@@ -292,75 +237,6 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
return true;
}
template<bool in_volume_segment>
ccl_device void light_tree_node_importance(KernelGlobals kg,
const float3 P,
const float3 N_or_D,
const float t,
const bool has_transmission,
const ccl_global KernelLightTreeNode *knode,
ccl_private float &max_importance,
ccl_private float &min_importance)
{
const BoundingCone bcone = knode->bcone;
const BoundingBox bbox = knode->bbox;
float3 point_to_centroid;
float cos_theta_u;
float distance;
if (knode->type == LIGHT_TREE_DISTANT) {
if (in_volume_segment) {
return;
}
point_to_centroid = -bcone.axis;
cos_theta_u = fast_cosf(bcone.theta_o);
distance = 1.0f;
}
else {
const float3 centroid = 0.5f * (bbox.min + bbox.max);
if (in_volume_segment) {
const float3 D = N_or_D;
const float3 closest_point = P + dot(centroid - P, D) * D;
/* Minimal distance of the ray to the cluster. */
distance = len(centroid - closest_point);
point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
}
else {
const float3 N = N_or_D;
const float3 bbox_extent = bbox.max - centroid;
const bool bbox_is_visible = has_transmission |
(dot(N, centroid - P) + dot(fabs(N), fabs(bbox_extent)) > 0);
/* If the node is guaranteed to be behind the surface we're sampling, and the surface is
* opaque, then we can give the node an importance of 0 as it contributes nothing to the
* surface. */
if (!bbox_is_visible) {
return;
}
point_to_centroid = normalize_len(centroid - P, &distance);
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, P, point_to_centroid);
}
/* Clamp distance to half the radius of the cluster when splitting is disabled. */
distance = fmaxf(0.5f * len(centroid - bbox.max), distance);
}
/* TODO: currently max_distance = min_distance, max_importance = min_importance for the
* nodes. Do we need better weights for complex scenes? */
light_tree_importance<in_volume_segment>(N_or_D,
has_transmission,
point_to_centroid,
cos_theta_u,
bcone,
distance,
distance,
t,
knode->energy,
max_importance,
min_importance);
}
template<bool in_volume_segment>
ccl_device void light_tree_emitter_importance(KernelGlobals kg,
const float3 P,
@@ -371,21 +247,11 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
ccl_private float &max_importance,
ccl_private float &min_importance)
{
max_importance = 0.0f;
min_importance = 0.0f;
const ccl_global KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
emitter_index);
if (is_mesh(kemitter)) {
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes,
kemitter->mesh.node_id);
light_tree_node_importance<in_volume_segment>(
kg, P, N_or_D, t, has_transmission, knode, max_importance, min_importance);
return;
}
max_importance = 0.0f;
min_importance = 0.0f;
BoundingCone bcone;
bcone.theta_o = kemitter->theta_o;
bcone.theta_e = kemitter->theta_e;
@@ -398,6 +264,8 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
return;
}
const int prim_id = kemitter->prim_id;
if (in_volume_segment) {
const float3 D = N_or_D;
/* Closest point. */
@@ -411,15 +279,9 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
P_c = P;
}
/* Early out if the emitter is guaranteed to be invisible. */
bool is_visible;
if (is_triangle(kemitter)) {
is_visible = triangle_light_tree_parameters<in_volume_segment>(
kg, kemitter, centroid, P_c, N_or_D, bcone, cos_theta_u, distance, point_to_centroid);
}
else {
kernel_assert(is_light(kemitter));
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~(kemitter->light.id));
if (prim_id < 0) {
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~prim_id);
switch (klight->type) {
/* Function templates only modifies cos_theta_u when in_volume_segment = true. */
case LIGHT_SPOT:
@@ -447,6 +309,10 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
return;
}
}
else { /* Mesh light. */
is_visible = triangle_light_tree_parameters<in_volume_segment>(
kg, kemitter, centroid, P_c, N_or_D, bcone, cos_theta_u, distance, point_to_centroid);
}
is_visible |= has_transmission;
if (!is_visible) {
@@ -467,31 +333,81 @@ ccl_device void light_tree_emitter_importance(KernelGlobals kg,
}
template<bool in_volume_segment>
ccl_device void light_tree_child_importance(KernelGlobals kg,
const float3 P,
const float3 N_or_D,
const float t,
const bool has_transmission,
const ccl_global KernelLightTreeNode *knode,
ccl_private float &max_importance,
ccl_private float &min_importance)
ccl_device void light_tree_node_importance(KernelGlobals kg,
const float3 P,
const float3 N_or_D,
const float t,
const bool has_transmission,
const ccl_global KernelLightTreeNode *knode,
ccl_private float &max_importance,
ccl_private float &min_importance)
{
max_importance = 0.0f;
min_importance = 0.0f;
if (knode->num_emitters == 1) {
light_tree_emitter_importance<in_volume_segment>(kg,
P,
N_or_D,
t,
has_transmission,
knode->leaf.first_emitter,
max_importance,
min_importance);
/* At a leaf node with only one emitter. */
light_tree_emitter_importance<in_volume_segment>(
kg, P, N_or_D, t, has_transmission, -knode->child_index, max_importance, min_importance);
}
else if (knode->num_emitters != 0) {
light_tree_node_importance<in_volume_segment>(
kg, P, N_or_D, t, has_transmission, knode, max_importance, min_importance);
const BoundingCone bcone = knode->bcone;
const BoundingBox bbox = knode->bbox;
float3 point_to_centroid;
float cos_theta_u;
float distance;
if (knode->bit_trail == 1) {
/* Distant light node. */
if (in_volume_segment) {
return;
}
point_to_centroid = -bcone.axis;
cos_theta_u = fast_cosf(bcone.theta_o);
distance = 1.0f;
}
else {
const float3 centroid = 0.5f * (bbox.min + bbox.max);
if (in_volume_segment) {
const float3 D = N_or_D;
const float3 closest_point = P + dot(centroid - P, D) * D;
/* Minimal distance of the ray to the cluster. */
distance = len(centroid - closest_point);
point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
}
else {
const float3 N = N_or_D;
const float3 bbox_extent = bbox.max - centroid;
const bool bbox_is_visible = has_transmission |
(dot(N, centroid - P) + dot(fabs(N), fabs(bbox_extent)) > 0);
/* If the node is guaranteed to be behind the surface we're sampling, and the surface is
* opaque, then we can give the node an importance of 0 as it contributes nothing to the
* surface. */
if (!bbox_is_visible) {
return;
}
point_to_centroid = normalize_len(centroid - P, &distance);
cos_theta_u = light_tree_cos_bounding_box_angle(bbox, P, point_to_centroid);
}
/* Clamp distance to half the radius of the cluster when splitting is disabled. */
distance = fmaxf(0.5f * len(centroid - bbox.max), distance);
}
/* TODO: currently max_distance = min_distance, max_importance = min_importance for the
* nodes. Do we need better weights for complex scenes? */
light_tree_importance<in_volume_segment>(N_or_D,
has_transmission,
point_to_centroid,
cos_theta_u,
bcone,
distance,
distance,
t,
knode->energy,
max_importance,
min_importance);
}
}
@@ -524,30 +440,26 @@ ccl_device void sample_resevoir(const int current_index,
template<bool in_volume_segment>
ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
ccl_private float &rand,
ccl_private float3 &P,
ccl_private float3 &N_or_D,
ccl_private float &t,
const float3 P,
const float3 N_or_D,
const float t,
const bool has_transmission,
ccl_private int *node_index,
const ccl_global KernelLightTreeNode *knode,
ccl_private float *pdf_factor)
{
float selected_importance[2] = {0.0f, 0.0f};
float total_importance[2] = {0.0f, 0.0f};
int selected_index = -1;
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, *node_index);
*node_index = -1;
/* Mark emitters with zero importance. Used for resevoir when total minimum importance = 0. */
kernel_assert(knode->num_emitters <= sizeof(uint) * 8);
uint has_importance = 0;
const bool sample_max = (rand > 0.5f); /* Sampling using the maximum importance. */
if (knode->num_emitters > 1) {
rand = rand * 2.0f - float(sample_max);
}
rand = rand * 2.0f - float(sample_max);
for (int i = 0; i < knode->num_emitters; i++) {
int current_index = knode->leaf.first_emitter + i;
int current_index = -knode->child_index + i;
/* maximum importance = importance[0], minimum importance = importance[1] */
float importance[2];
light_tree_emitter_importance<in_volume_segment>(
@@ -580,7 +492,7 @@ ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
else {
selected_index = -1;
for (int i = 0; i < knode->num_emitters; i++) {
int current_index = knode->inner.right_child + i;
int current_index = -knode->child_index + i;
sample_resevoir(current_index,
float(has_importance & 1),
selected_index,
@@ -596,24 +508,8 @@ ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
}
}
*pdf_factor *= 0.5f * (selected_importance[0] / total_importance[0] +
selected_importance[1] / total_importance[1]);
const ccl_global KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
selected_index);
if (is_mesh(kemitter)) {
/* Transform ray from world to local space. */
light_tree_to_local_space<in_volume_segment>(kg, kemitter->mesh.object_id, P, N_or_D, t);
*node_index = kemitter->mesh.node_id;
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes,
*node_index);
if (knode->type == LIGHT_TREE_INSTANCE) {
/* Switch to the node with the subtree. */
*node_index = knode->instance.reference;
}
}
*pdf_factor = 0.5f * (selected_importance[0] / total_importance[0] +
selected_importance[1] / total_importance[1]);
return selected_index;
}
@@ -632,9 +528,9 @@ ccl_device bool get_left_probability(KernelGlobals kg,
const ccl_global KernelLightTreeNode *right = &kernel_data_fetch(light_tree_nodes, right_index);
float min_left_importance, max_left_importance, min_right_importance, max_right_importance;
light_tree_child_importance<in_volume_segment>(
light_tree_node_importance<in_volume_segment>(
kg, P, N_or_D, t, has_transmission, left, max_left_importance, min_left_importance);
light_tree_child_importance<in_volume_segment>(
light_tree_node_importance<in_volume_segment>(
kg, P, N_or_D, t, has_transmission, right, max_right_importance, min_right_importance);
const float total_max_importance = max_left_importance + max_right_importance;
@@ -660,8 +556,8 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
const float randv,
const float time,
const float3 P,
float3 N_or_D,
float t,
const float3 N_or_D,
const float t,
const int shader_flags,
const int bounce,
const uint32_t path_flag,
@@ -675,38 +571,28 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
float pdf_leaf = 1.0f;
float pdf_selection = 1.0f;
int selected_emitter = -1;
int object = 0;
int node_index = 0; /* Root node. */
float3 local_P = P;
int node_index = 0; /* Root node. */
/* Traverse the light tree until a leaf node is reached. */
while (true) {
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, node_index);
if (is_leaf(knode)) {
if (knode->child_index <= 0) {
/* At a leaf node, we pick an emitter. */
selected_emitter = light_tree_cluster_select_emitter<in_volume_segment>(
kg, randn, local_P, N_or_D, t, has_transmission, &node_index, &pdf_selection);
if (node_index < 0) {
break;
}
else {
/* Continue with the picked mesh light. */
object = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id;
continue;
}
kg, randn, P, N_or_D, t, has_transmission, knode, &pdf_selection);
break;
}
/* At an interior node, the left child is directly after the parent, while the right child is
* stored as the child index. */
const int left_index = node_index + 1;
const int right_index = knode->inner.right_child;
const int right_index = knode->child_index;
float left_prob;
if (!get_left_probability<in_volume_segment>(
kg, local_P, N_or_D, t, has_transmission, left_index, right_index, left_prob)) {
kg, P, N_or_D, t, has_transmission, left_index, right_index, left_prob)) {
return false; /* Both child nodes have zero importance. */
}
@@ -724,104 +610,38 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
pdf_selection *= pdf_leaf;
return light_sample<in_volume_segment>(
kg, randu, randv, time, P, bounce, path_flag, selected_emitter, object, pdf_selection, ls);
kg, randu, randv, time, P, bounce, path_flag, selected_emitter, pdf_selection, ls);
}
/* We need to be able to find the probability of selecting a given light for MIS. */
ccl_device float light_tree_pdf(
KernelGlobals kg, float3 P, float3 N, const int path_flag, const int object, const uint target)
KernelGlobals kg, const float3 P, const float3 N, const int path_flag, const int emitter)
{
const bool has_transmission = (path_flag & PATH_RAY_MIS_HAD_TRANSMISSION);
/* Target emitter info. */
const int target_emitter = (emitter >= 0) ? kernel_data_fetch(triangle_to_tree, emitter) :
kernel_data_fetch(light_to_tree, ~emitter);
ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
target);
int root_index, target_leaf;
uint bit_trail, target_emitter;
target_emitter);
const int target_leaf = kemitter->parent_index;
ccl_global const KernelLightTreeNode *kleaf = &kernel_data_fetch(light_tree_nodes, target_leaf);
uint bit_trail = kleaf->bit_trail;
if (is_triangle(kemitter)) {
/* If the target is an emissive triangle, first traverse the top level tree to find the mesh
* light emitter, then traverse the subtree. */
target_emitter = kernel_data_fetch(object_to_tree, object);
ccl_global const KernelLightTreeEmitter *kmesh = &kernel_data_fetch(light_tree_emitters,
target_emitter);
target_leaf = kmesh->parent_index;
root_index = kmesh->mesh.node_id;
ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes, root_index);
bit_trail = kroot->bit_trail;
if (kroot->type == LIGHT_TREE_INSTANCE) {
root_index = kroot->instance.reference;
}
}
else {
root_index = 0;
target_leaf = kemitter->parent_index;
bit_trail = kernel_data_fetch(light_tree_nodes, target_leaf).bit_trail;
target_emitter = target;
}
int node_index = 0; /* Root node. */
float pdf = 1.0f;
int node_index = 0;
/* Traverse the light tree until we reach the target leaf node. */
while (true) {
const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, node_index);
if (is_leaf(knode)) {
kernel_assert(node_index == target_leaf);
ccl_global const KernelLightTreeNode *kleaf = &kernel_data_fetch(light_tree_nodes,
target_leaf);
/* Iterate through leaf node to find the probability of sampling the target emitter. */
float target_max_importance = 0.0f;
float target_min_importance = 0.0f;
float total_max_importance = 0.0f;
float total_min_importance = 0.0f;
int num_has_importance = 0;
for (int i = 0; i < kleaf->num_emitters; i++) {
const int emitter = kleaf->leaf.first_emitter + i;
float max_importance, min_importance;
light_tree_emitter_importance<false>(
kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
num_has_importance += (max_importance > 0);
if (emitter == target_emitter) {
target_max_importance = max_importance;
target_min_importance = min_importance;
}
total_max_importance += max_importance;
total_min_importance += min_importance;
}
if (target_max_importance > 0.0f) {
pdf *= 0.5f * (target_max_importance / total_max_importance +
(total_min_importance > 0 ? target_min_importance / total_min_importance :
1.0f / num_has_importance));
}
else {
return 0.0f;
}
if (root_index) {
/* Arrived at the mesh light. Continue with the subtree. */
float unused;
light_tree_to_local_space<false>(kg, object, P, N, unused);
node_index = root_index;
root_index = 0;
target_emitter = target;
target_leaf = kemitter->parent_index;
bit_trail = kernel_data_fetch(light_tree_nodes, target_leaf).bit_trail;
continue;
}
else {
kernel_assert(node_index == target_leaf);
return pdf;
}
if (knode->child_index <= 0) {
break;
}
/* Interior node. */
const int left_index = node_index + 1;
const int right_index = knode->inner.right_child;
const int right_index = knode->child_index;
float left_prob;
if (!get_left_probability<false>(
@@ -838,6 +658,36 @@ ccl_device float light_tree_pdf(
return 0.0f;
}
}
kernel_assert(node_index == target_leaf);
/* Iterate through leaf node to find the probability of sampling the target emitter. */
float target_max_importance = 0.0f;
float target_min_importance = 0.0f;
float total_max_importance = 0.0f;
float total_min_importance = 0.0f;
int num_has_importance = 0;
for (int i = 0; i < kleaf->num_emitters; i++) {
const int emitter = -kleaf->child_index + i;
float max_importance, min_importance;
light_tree_emitter_importance<false>(
kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
num_has_importance += (max_importance > 0);
if (emitter == target_emitter) {
target_max_importance = max_importance;
target_min_importance = min_importance;
}
total_max_importance += max_importance;
total_min_importance += min_importance;
}
if (target_max_importance > 0.0f) {
return pdf * 0.5f *
(target_max_importance / total_max_importance +
(total_min_importance > 0 ? target_min_importance / total_min_importance :
1.0f / num_has_importance));
}
return 0.0f;
}
CCL_NAMESPACE_END

View File

@@ -304,8 +304,9 @@ ccl_device_forceinline bool triangle_light_tree_parameters(
cos_theta_u = FLT_MAX;
const int object = kemitter->mesh_light.object_id;
float3 vertices[3];
triangle_vertices(kg, kemitter->triangle.id, vertices);
triangle_world_space_vertices(kg, object, kemitter->prim_id, -1.0f, vertices);
bool shape_above_surface = false;
for (int i = 0; i < 3; i++) {

View File

@@ -1390,128 +1390,19 @@ ccl_device_extern void osl_noiseparams_set_impulses(ccl_private OSLNoiseOptions
res->y = n; \
res->z = n; \
} \
ccl_device_extern void name##_vv(ccl_private float3 *res, ccl_private const float3 *v) \
ccl_device_extern void name##_vv(ccl_private float3 *res, const float3 *v) \
{ \
const float n = name##_fv(v); \
res->x = n; \
res->y = n; \
res->z = n; \
} \
ccl_device_extern void name##_vvf( \
ccl_private float3 *res, ccl_private const float3 *v, float w) \
ccl_device_extern void name##_vvf(ccl_private float3 *res, const float3 *v, float w) \
{ \
const float n = name##_fvf(v, w); \
res->x = n; \
res->y = n; \
res->z = n; \
} \
ccl_device_extern void name##_dfdf(ccl_private float *res, ccl_private const float *x) \
{ \
res[0] = name##_ff(x[0]); \
res[1] = name##_ff(x[1]); \
res[2] = name##_ff(x[2]); \
} \
ccl_device_extern void name##_dfdff( \
ccl_private float *res, ccl_private const float *x, float y) \
{ \
res[0] = name##_fff(x[0], y); \
res[1] = name##_fff(x[1], y); \
res[2] = name##_fff(x[2], y); \
} \
ccl_device_extern void name##_dffdf( \
ccl_private float *res, float x, ccl_private const float *y) \
{ \
res[0] = name##_fff(x, y[0]); \
res[1] = name##_fff(x, y[1]); \
res[2] = name##_fff(x, y[2]); \
} \
ccl_device_extern void name##_dfdfdf( \
ccl_private float *res, ccl_private const float *x, ccl_private const float *y) \
{ \
res[0] = name##_fff(x[0], y[0]); \
res[1] = name##_fff(x[1], y[1]); \
res[2] = name##_fff(x[2], y[2]); \
} \
ccl_device_extern void name##_dfdv(ccl_private float *res, ccl_private const float3 *v) \
{ \
res[0] = name##_fv(&v[0]); \
res[1] = name##_fv(&v[1]); \
res[2] = name##_fv(&v[2]); \
} \
ccl_device_extern void name##_dfdvf( \
ccl_private float *res, ccl_private const float3 *v, float w) \
{ \
res[0] = name##_fvf(&v[0], w); \
res[1] = name##_fvf(&v[1], w); \
res[2] = name##_fvf(&v[2], w); \
} \
ccl_device_extern void name##_dfvdf( \
ccl_private float *res, ccl_private const float3 *v, ccl_private const float *w) \
{ \
res[0] = name##_fvf(v, w[0]); \
res[1] = name##_fvf(v, w[1]); \
res[2] = name##_fvf(v, w[2]); \
} \
ccl_device_extern void name##_dfdvdf( \
ccl_private float *res, ccl_private const float3 *v, ccl_private const float *w) \
{ \
res[0] = name##_fvf(&v[0], w[0]); \
res[1] = name##_fvf(&v[1], w[1]); \
res[2] = name##_fvf(&v[2], w[2]); \
} \
ccl_device_extern void name##_dvdf(ccl_private float3 *res, ccl_private const float *x) \
{ \
name##_vf(&res[0], x[0]); \
name##_vf(&res[1], x[1]); \
name##_vf(&res[2], x[2]); \
} \
ccl_device_extern void name##_dvdff( \
ccl_private float3 *res, ccl_private const float *x, float y) \
{ \
name##_vff(&res[0], x[0], y); \
name##_vff(&res[1], x[1], y); \
name##_vff(&res[2], x[2], y); \
} \
ccl_device_extern void name##_dvfdf( \
ccl_private float3 *res, float x, ccl_private const float *y) \
{ \
name##_vff(&res[0], x, y[0]); \
name##_vff(&res[1], x, y[1]); \
name##_vff(&res[2], x, y[2]); \
} \
ccl_device_extern void name##_dvdfdf( \
ccl_private float3 *res, ccl_private const float *x, ccl_private const float *y) \
{ \
name##_vff(&res[0], x[0], y[0]); \
name##_vff(&res[1], x[1], y[1]); \
name##_vff(&res[2], x[2], y[2]); \
} \
ccl_device_extern void name##_dvdv(ccl_private float3 *res, ccl_private const float3 *v) \
{ \
name##_vv(&res[0], &v[0]); \
name##_vv(&res[1], &v[1]); \
name##_vv(&res[2], &v[2]); \
} \
ccl_device_extern void name##_dvdvf( \
ccl_private float3 *res, ccl_private const float3 *v, float w) \
{ \
name##_vvf(&res[0], &v[0], w); \
name##_vvf(&res[1], &v[1], w); \
name##_vvf(&res[2], &v[2], w); \
} \
ccl_device_extern void name##_dvvdf( \
ccl_private float3 *res, ccl_private const float3 *v, ccl_private const float *w) \
{ \
name##_vvf(&res[0], v, w[0]); \
name##_vvf(&res[1], v, w[1]); \
name##_vvf(&res[2], v, w[2]); \
} \
ccl_device_extern void name##_dvdvdf( \
ccl_private float3 *res, ccl_private const float3 *v, ccl_private const float *w) \
{ \
name##_vvf(&res[0], &v[0], w[0]); \
name##_vvf(&res[1], &v[1], w[1]); \
name##_vvf(&res[2], &v[2], w[2]); \
}
ccl_device_forceinline float hashnoise_1d(float p)

View File

@@ -132,11 +132,11 @@ color sky_radiance_nishita(vector dir, float nishita_data[10], string filename)
/* definitions */
vector sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2);
float sun_dir_angle = precise_angle(dir, sun_dir);
float half_angular = angular_diameter * 0.5;
float half_angular = angular_diameter / 2.0;
float dir_elevation = M_PI_2 - direction[0];
/* If the ray is inside the sun disc, render it, otherwise render the sky.
* Alternatively, ignore the sun if we're evaluating the background texture. */
/* if ray inside sun disc render it, otherwise render sky.
* alternatively, ignore the sun if we're evaluating the background texture. */
if (sun_dir_angle < half_angular && sun_disc == 1 && raytype("importance_bake") != 1) {
/* get 2 pixels data */
color pixel_bottom = color(nishita_data[0], nishita_data[1], nishita_data[2]);

View File

@@ -84,8 +84,8 @@ ccl_device_inline void sample_uniform_cone(const float3 N,
ccl_device_inline float pdf_uniform_cone(const float3 N, float3 D, float angle)
{
float zMin = cosf(angle);
float z = precise_angle(N, D);
if (z < angle) {
float z = dot(N, D);
if (z > zMin) {
return M_1_2PI_F / (1.0f - zMin);
}
return 0.0f;

View File

@@ -138,13 +138,12 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals kg,
/* definitions */
float3 sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2_F);
float sun_dir_angle = precise_angle(dir, sun_dir);
float half_angular = angular_diameter * 0.5f;
float half_angular = angular_diameter / 2.0f;
float dir_elevation = M_PI_2_F - direction.x;
/* If the ray is inside the sun disc, render it, otherwise render the sky.
* Alternatively, ignore the sun if we're evaluating the background texture. */
if (sun_disc && sun_dir_angle < half_angular &&
!((path_flag & PATH_RAY_IMPORTANCE_BAKE) && kernel_data.background.use_sun_guiding)) {
/* if ray inside sun disc render it, otherwise render sky.
* alternatively, ignore the sun if we're evaluating the background texture. */
if (sun_disc && sun_dir_angle < half_angular && !(path_flag & PATH_RAY_IMPORTANCE_BAKE)) {
/* get 2 pixels data */
float y;

View File

@@ -3,9 +3,8 @@
#pragma once
#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
defined(WITH_EMBREE)
# if EMBREE_MAJOR_VERSION == 4
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
# if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore.h>
# include <embree4/rtcore_scene.h>
# else
@@ -79,8 +78,9 @@ CCL_NAMESPACE_BEGIN
#define __VISIBILITY_FLAG__
#define __VOLUME__
/* TODO: solve internal compiler errors and enable light tree on HIP. */
/* TODO: solve internal compiler perf issue and enable light tree on Metal/AMD. */
#if defined(__KERNEL_METAL_AMD__)
#if defined(__KERNEL_HIP__) || defined(__KERNEL_METAL_AMD__)
# undef __LIGHT_TREE__
#endif
@@ -1370,13 +1370,6 @@ using BoundingCone = struct BoundingCone {
float theta_e;
};
enum LightTreeNodeType : uint8_t {
LIGHT_TREE_INSTANCE = (1 << 0),
LIGHT_TREE_INNER = (1 << 1),
LIGHT_TREE_LEAF = (1 << 2),
LIGHT_TREE_DISTANT = (1 << 3),
};
typedef struct KernelLightTreeNode {
/* Bounding box. */
BoundingBox bbox;
@@ -1387,25 +1380,17 @@ typedef struct KernelLightTreeNode {
/* Energy. */
float energy;
LightTreeNodeType type;
/* Leaf nodes need to know the number of emitters stored. */
int num_emitters;
union {
struct {
int first_emitter; /* The index of the first emitter. */
} leaf;
struct {
int right_child; /* The index of the right child. */
} inner;
struct {
int reference; /* A reference to the node with the subtree. */
} instance;
};
/* If this is 0 or less, we're at a leaf node
* and the negative value indexes into the first child of the light array.
* Otherwise, it's an index to the node's second child. */
int child_index;
int num_emitters; /* leaf nodes need to know the number of emitters stored. */
/* Bit trail. */
uint bit_trail;
/* Padding. */
int pad;
} KernelLightTreeNode;
static_assert_align(KernelLightTreeNode, 16);
@@ -1417,23 +1402,10 @@ typedef struct KernelLightTreeEmitter {
/* Energy. */
float energy;
union {
struct {
int id; /* The location in the triangles array. */
EmissionSampling emission_sampling;
} triangle;
struct {
int id; /* The location in the lights array. */
} light;
struct {
int object_id;
int node_id;
} mesh;
};
/* The location in the lights or triangles array. */
int prim_id;
MeshLight mesh_light;
EmissionSampling emission_sampling;
/* Parent. */
int parent_index;

View File

@@ -15,12 +15,8 @@ set(SRC
camera.cpp
colorspace.cpp
constant_fold.cpp
devicescene.cpp
film.cpp
geometry.cpp
geometry_attributes.cpp
geometry_bvh.cpp
geometry_mesh.cpp
hair.cpp
image.cpp
image_oiio.cpp
@@ -59,7 +55,6 @@ set(SRC_HEADERS
camera.h
colorspace.h
constant_fold.h
devicescene.h
film.h
geometry.h
hair.h

View File

@@ -1,64 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "scene/devicescene.h"
#include "device/device.h"
#include "device/memory.h"
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
: bvh_nodes(device, "bvh_nodes", MEM_GLOBAL),
bvh_leaf_nodes(device, "bvh_leaf_nodes", MEM_GLOBAL),
object_node(device, "object_node", MEM_GLOBAL),
prim_type(device, "prim_type", MEM_GLOBAL),
prim_visibility(device, "prim_visibility", MEM_GLOBAL),
prim_index(device, "prim_index", MEM_GLOBAL),
prim_object(device, "prim_object", MEM_GLOBAL),
prim_time(device, "prim_time", MEM_GLOBAL),
tri_verts(device, "tri_verts", MEM_GLOBAL),
tri_shader(device, "tri_shader", MEM_GLOBAL),
tri_vnormal(device, "tri_vnormal", MEM_GLOBAL),
tri_vindex(device, "tri_vindex", MEM_GLOBAL),
tri_patch(device, "tri_patch", MEM_GLOBAL),
tri_patch_uv(device, "tri_patch_uv", MEM_GLOBAL),
curves(device, "curves", MEM_GLOBAL),
curve_keys(device, "curve_keys", MEM_GLOBAL),
curve_segments(device, "curve_segments", MEM_GLOBAL),
patches(device, "patches", MEM_GLOBAL),
points(device, "points", MEM_GLOBAL),
points_shader(device, "points_shader", MEM_GLOBAL),
objects(device, "objects", MEM_GLOBAL),
object_motion_pass(device, "object_motion_pass", MEM_GLOBAL),
object_motion(device, "object_motion", MEM_GLOBAL),
object_flag(device, "object_flag", MEM_GLOBAL),
object_volume_step(device, "object_volume_step", MEM_GLOBAL),
object_prim_offset(device, "object_prim_offset", MEM_GLOBAL),
camera_motion(device, "camera_motion", MEM_GLOBAL),
attributes_map(device, "attributes_map", MEM_GLOBAL),
attributes_float(device, "attributes_float", MEM_GLOBAL),
attributes_float2(device, "attributes_float2", MEM_GLOBAL),
attributes_float3(device, "attributes_float3", MEM_GLOBAL),
attributes_float4(device, "attributes_float4", MEM_GLOBAL),
attributes_uchar4(device, "attributes_uchar4", MEM_GLOBAL),
light_distribution(device, "light_distribution", MEM_GLOBAL),
lights(device, "lights", MEM_GLOBAL),
light_background_marginal_cdf(device, "light_background_marginal_cdf", MEM_GLOBAL),
light_background_conditional_cdf(device, "light_background_conditional_cdf", MEM_GLOBAL),
light_tree_nodes(device, "light_tree_nodes", MEM_GLOBAL),
light_tree_emitters(device, "light_tree_emitters", MEM_GLOBAL),
light_to_tree(device, "light_to_tree", MEM_GLOBAL),
object_to_tree(device, "object_to_tree", MEM_GLOBAL),
object_lookup_offset(device, "object_lookup_offset", MEM_GLOBAL),
triangle_to_tree(device, "triangle_to_tree", MEM_GLOBAL),
particles(device, "particles", MEM_GLOBAL),
svm_nodes(device, "svm_nodes", MEM_GLOBAL),
shaders(device, "shaders", MEM_GLOBAL),
lookup_table(device, "lookup_table", MEM_GLOBAL),
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
ies_lights(device, "ies", MEM_GLOBAL)
{
memset((void *)&data, 0, sizeof(data));
}
CCL_NAMESPACE_END

Some files were not shown because too many files have changed in this diff Show More