Versioning function to replace legacy instancing panel by geometry node modifier #105494

Open
Iliya Katushenock wants to merge 44 commits from mod_moder/blender:instances into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
700 changed files with 12252 additions and 6104 deletions
Showing only changes of commit f0333286fa - Show all commits

View File

@ -521,7 +521,8 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
# Radeon VII (gfx906) not currently working with HIP SDK, so left out of the list.
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@ -1580,6 +1581,8 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_MISSING_NORETURN -Wno-missing-noreturn)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_UNUSED_BUT_SET_VARIABLE -Wno-unused-but-set-variable)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_DEPRECATED_DECLARATIONS -Wno-deprecated-declarations)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PARAMETER -Wno-unused-parameter)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PRIVATE_FIELD -Wno-unused-private-field)
@ -1593,6 +1596,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNDEFINED_VAR_TEMPLATE -Wno-undefined-var-template)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_INSTANTIATION_AFTER_SPECIALIZATION -Wno-instantiation-after-specialization)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_MISLEADING_INDENTATION -Wno-misleading-indentation)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")

View File

@ -58,9 +58,6 @@ Static Source Code Checking
* check_cppcheck: Run blender source through cppcheck (C & C++).
* check_clang_array: Run blender source through clang array checking script (C & C++).
* check_deprecated: Check if there is any deprecated code to remove.
* check_splint: Run blenders source through splint (C only).
* check_sparse: Run blenders source through sparse (C only).
* check_smatch: Run blenders source through smatch (C only).
* check_descriptions: Check for duplicate/invalid descriptions.
* check_licenses: Check license headers follow the SPDX license specification,
using one of the accepted licenses in 'doc/license/SPDX-license-identifiers.txt'
@ -474,21 +471,6 @@ check_clang_array: .FORCE
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang_array.py"
check_splint: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_splint.py"
check_sparse: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_sparse.py"
check_smatch: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
check_mypy: .FORCE
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"

View File

@ -90,28 +90,26 @@ include(cmake/haru.cmake)
# Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
include(cmake/boost.cmake)
include(cmake/pugixml.cmake)
include(cmake/ispc.cmake)
include(cmake/openimagedenoise.cmake)
include(cmake/embree.cmake)
include(cmake/openpgl.cmake)
include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
include(cmake/xml2.cmake)
include(cmake/fribidi.cmake)
include(cmake/harfbuzz.cmake)
if(NOT APPLE)
include(cmake/xr_openxr.cmake)
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
endif()
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
if(NOT WIN32)
include(cmake/igc.cmake)
include(cmake/gmmlib.cmake)
include(cmake/ocloc.cmake)
endif()
endif()
include(cmake/ispc.cmake)
include(cmake/openimagedenoise.cmake)
# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
include(cmake/embree.cmake)
include(cmake/openpgl.cmake)
include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
include(cmake/xml2.cmake)
# OpenColorIO and dependencies.
include(cmake/expat.cmake)

View File

@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
download_source(ICDLOADER)
download_source(MP11)
download_source(SPIRV_HEADERS)
download_source(UNIFIED_RUNTIME)
download_source(IGC)
download_source(IGC_LLVM)
download_source(IGC_OPENCL_CLANG)

View File

@ -5,6 +5,9 @@
# for now.
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
if(WIN32)
set(LLVM_GENERATOR "Ninja")
else()
@ -38,17 +41,18 @@ set(DPCPP_EXTRA_ARGS
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
-DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
# Below here is copied from an invocation of buildbot/config.py
-DLLVM_ENABLE_ASSERTIONS=ON
-DLLVM_TARGETS_TO_BUILD=X86
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
-DLIBCLC_TARGETS_TO_BUILD=
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
@ -104,13 +108,19 @@ add_dependencies(
external_mp11
external_level-zero
external_spirvheaders
external_unifiedruntime
)
if(BUILD_MODE STREQUAL Release AND WIN32)
ExternalProject_Add_Step(external_dpcpp after_install
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld.lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld64.lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe
)
endif()

View File

@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
ExternalProject_Add(external_unifiedruntime
URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/unifiedruntime
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)

View File

@ -3,6 +3,8 @@
# Note the utility apps may use png/tiff/gif system libraries, but the
# library itself does not depend on them, so should give no problems.
set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
set(EMBREE_EXTRA_ARGS
-DEMBREE_ISPC_SUPPORT=OFF
-DEMBREE_TUTORIALS=OFF
@ -31,6 +33,43 @@ if(NOT BLENDER_PLATFORM_ARM)
)
endif()
if(NOT APPLE)
if(WIN32)
# Levels below -O2 don't work well for Embree+SYCL.
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
set(EMBREE_CMAKE_FLAGS
-DCMAKE_BUILD_TYPE=${BUILD_MODE}
-DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
-DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
-DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
-DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
-DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
-DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
-DCMAKE_CXX_STANDARD=17
)
set(EMBREE_EXTRA_ARGS
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
-DEMBREE_SYCL_SUPPORT=ON
${EMBREE_EXTRA_ARGS}
)
else()
set(EMBREE_EXTRA_ARGS
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
-DEMBREE_SYCL_SUPPORT=ON
${EMBREE_EXTRA_ARGS}
)
endif()
endif()
if(TBB_STATIC_LIBRARY)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
@ -42,16 +81,25 @@ ExternalProject_Add(external_embree
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/embree
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/embree
)
add_dependencies(
external_embree
external_tbb
)
if(NOT APPLE)
add_dependencies(
external_embree
external_tbb
external_dpcpp
)
else()
add_dependencies(
external_embree
external_tbb
)
endif()
if(WIN32)
if(BUILD_MODE STREQUAL Release)
@ -66,6 +114,7 @@ if(WIN32)
ExternalProject_Add_Step(external_embree after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
DEPENDEES install
)
endif()

View File

@ -74,6 +74,27 @@ if(WIN32)
set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
# Set similar flags for CLANG compilation.
set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
if(WITH_OPTIMIZED_DEBUG)
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
else()
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
endif()
set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
if(WITH_OPTIMIZED_DEBUG)
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
else()
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
endif()
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(PLATFORM_FLAGS)
set(PLATFORM_CXX_FLAGS)
set(PLATFORM_CMAKE_FLAGS)

View File

@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
set(OPENPGL_HASH_TYPE SHA256)
set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)
set(LEVEL_ZERO_VERSION v1.8.5)
set(LEVEL_ZERO_VERSION v1.8.8)
set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
set(LEVEL_ZERO_HASH_TYPE SHA256)
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
set(DPCPP_VERSION 20221019)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
set(DPCPP_VERSION 2022-12)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
set(DPCPP_HASH_TYPE SHA256)
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
# will take care of building them, unpack is being done in dpcpp_deps.cmake
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
set(VCINTRINSICS_HASH_TYPE SHA256)
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
set(SPIRV_HEADERS_HASH_TYPE SHA256)
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
######################
### DPCPP DEPS END ###
######################
@ -730,9 +737,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 22.49.25018.21)
set(OCLOC_VERSION 23.05.25593.18)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
external_dpcpp -- external_mp11;
external_dpcpp -- external_level_zero;
external_dpcpp -- external_spirvheaders;
external_dpcpp -- external_unifiedruntime;
external_embree -- external_tbb;
external_ffmpeg -- external_zlib;
external_ffmpeg -- external_openjpeg;

View File

@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
libsycldevice-obj
libsycldevice-spv)
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 17eeaafae194..09e6d2217aaa 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
}
assert(BinImages.size() > 0 && "Expected to find at least one device image");
+ // Ignore images with incompatible state. Image is considered compatible
+ // with a target state if an image is already in the target state or can
+ // be brought to target state by compiling/linking/building.
+ //
+ // Example: an image in "executable" state is not compatible with
+ // "input" target state - there is no operation to convert the image it
+ // to "input" state. An image in "input" state is compatible with
+ // "executable" target state because it can be built to get into
+ // "executable" state.
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
+ if (getBinImageState(*It) > TargetState)
+ It = BinImages.erase(It);
+ else
+ ++It;
+ }
+
std::vector<device_image_plain> SYCLDeviceImages;
- for (RTDeviceBinaryImage *BinImage : BinImages) {
- const bundle_state ImgState = getBinImageState(BinImage);
-
- // Ignore images with incompatible state. Image is considered compatible
- // with a target state if an image is already in the target state or can
- // be brought to target state by compiling/linking/building.
- //
- // Example: an image in "executable" state is not compatible with
- // "input" target state - there is no operation to convert the image it
- // to "input" state. An image in "input" state is compatible with
- // "executable" target state because it can be built to get into
- // "executable" state.
- if (ImgState > TargetState)
- continue;
- for (const sycl::device &Dev : Devs) {
+ // If a non-input state is requested, we can filter out some compatible
+ // images and return only those with the highest compatible state for each
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
+ // image, so that any unneeded ones are skipped.
+ // TODO this has no effect if the requested state is input, consider having
+ // a separate branch for that case to avoid unnecessary tracking work.
+ struct DeviceBinaryImageInfo {
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+ bundle_state State = bundle_state::input;
+ int RequirementCounter = 0;
+ };
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
+
+ for (const sycl::device &Dev : Devs) {
+ // Track the highest image state for each requested kernel.
+ using StateImagesPairT =
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
+ using KernelImageMapT =
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
+ KernelImageMapT KernelImageMap;
+ if (!KernelIDs.empty())
+ for (const kernel_id &KernelID : KernelIDs)
+ KernelImageMap.insert({KernelID, {}});
+
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
if (!compatibleWithDevice(BinImage, Dev) ||
!doesDevSupportImgAspects(Dev, *BinImage))
continue;
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
- // Collect kernel names for the image
- {
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
- KernelIDs = m_BinImg2KernelIDs[BinImage];
- // If the image does not contain any non-service kernels we can skip it.
- if (!KernelIDs || KernelIDs->empty())
- continue;
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
+ if (InsertRes.second) {
+ ImgInfo.State = getBinImageState(BinImage);
+ // Collect kernel names for the image
+ {
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
+ }
}
+ const bundle_state ImgState = ImgInfo.State;
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
+ ImgInfo.KernelIDs;
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
+ // If the image does not contain any non-service kernels we can skip it.
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
+ continue;
- SYCLDeviceImages.push_back(
- createSyclObjFromImpl<device_image_plain>(Impl));
- break;
+ // Update tracked information.
+ for (kernel_id &KernelID : *ImageKernelIDs) {
+ StateImagesPairT *StateImagesPair;
+ // If only specific kernels are requested, ignore the rest.
+ if (!KernelIDs.empty()) {
+ auto It = KernelImageMap.find(KernelID);
+ if (It == KernelImageMap.end())
+ continue;
+ StateImagesPair = &It->second;
+ } else
+ StateImagesPair = &KernelImageMap[KernelID];
+
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+
+ if (KernelImages.empty()) {
+ KernelImagesState = ImgState;
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState < ImgState) {
+ for (RTDeviceBinaryImage *Img : KernelImages) {
+ auto It = ImageInfoMap.find(Img);
+ assert(It != ImageInfoMap.end());
+ assert(It->second.RequirementCounter > 0);
+ --(It->second.RequirementCounter);
+ }
+ KernelImages.clear();
+ KernelImages.push_back(BinImage);
+ KernelImagesState = ImgState;
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState == ImgState) {
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ }
+ }
}
}
+ for (const auto &ImgInfoPair : ImageInfoMap) {
+ if (ImgInfoPair.second.RequirementCounter == 0)
+ continue;
+
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
+
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
+ }
+
return SYCLDeviceImages;
}

View File

@ -149,3 +149,19 @@ index 074f910a2..30f490818 100644
return is_hit_first | is_hit_second;
}
};
diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
--- a/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:23:06.429190200 +0200
+++ b/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:35:01.291938600 +0200
@@ -540,7 +540,12 @@
assert(offset <= geomDescrData.size());
}
+ /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
+#if TBB_VERSION_MAJOR<2021
+ RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
+#else
RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
+#endif
/* estimate static accel size */
BBox1f time_range(0,1);

View File

@ -37,18 +37,24 @@ elseif(HIP_HIPCC_EXECUTABLE)
set(HIP_VERSION_MINOR 0)
set(HIP_VERSION_PATCH 0)
if(WIN32)
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE}.bat)
else()
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
endif()
# Get version from the output.
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
OUTPUT_VARIABLE HIP_VERSION_RAW
execute_process(COMMAND ${_hipcc_executable} --version
OUTPUT_VARIABLE _hip_version_raw
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Parse parts.
if(HIP_VERSION_RAW MATCHES "HIP version: .*")
if(_hip_version_raw MATCHES "HIP version: .*")
# Strip the HIP prefix and get list of individual version components.
string(REGEX REPLACE
".*HIP version: ([.0-9]+).*" "\\1"
HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
HIP_SEMANTIC_VERSION "${_hip_version_raw}")
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
@ -71,7 +77,13 @@ elseif(HIP_HIPCC_EXECUTABLE)
# Construct full semantic version.
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
unset(HIP_VERSION_RAW)
unset(_hip_version_raw)
unset(_hipcc_executable)
else()
set(HIP_FOUND FALSE)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HIP
REQUIRED_VARS HIP_HIPCC_EXECUTABLE
VERSION_VAR HIP_VERSION)

View File

@ -108,7 +108,11 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL
IF(SYCL_FOUND)
SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${SYCL_INCLUDE_DIR}/sycl)
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
IF(WIN32 AND SYCL_LIBRARY_DEBUG)
SET(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
ELSE()
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
ENDIF()
ELSE()
SET(SYCL_SYCL_FOUND FALSE)
ENDIF()

View File

@ -1,58 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "smatch"
CHECKER_ARGS = [
"--full-path",
"--two-passes",
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs] +
source_defines
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (percent_str, c))
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@ -1,56 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "sparse"
CHECKER_ARGS = [
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs] +
source_defines
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (percent_str, c))
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@ -1,86 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "splint"
CHECKER_ARGS = [
"-weak",
"-posix-lib",
"-linelen", "10000",
"+ignorequals",
"+relaxtypes",
"-retvalother",
"+matchanyintegral",
"+longintegral",
"+ignoresigns",
"-nestcomment",
"-predboolothers",
"-ifempty",
"-unrecogcomments",
# we may want to remove these later
"-type",
"-fixedformalarray",
"-fullinitblock",
"-fcnuse",
"-initallelements",
"-castfcnptr",
# -forcehints,
"-bufferoverflowhigh", # warns a lot about sprintf()
# re-definitions, rna causes most of these
"-redef",
"-syntax",
# dummy, witjout this splint complains with:
# /usr/include/bits/confname.h:31:27: *** Internal Bug at cscannerHelp.c:2428: Unexpanded macro not function or constant: int _PC_MAX_CANON
"-D_PC_MAX_CANON=0",
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs]
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.write("%s %s\n" % (percent_str, c))
sys.stdout.flush()
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@ -82,7 +82,7 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@ -1384,4 +1384,3 @@ macro(windows_process_platform_bundled_libraries library_deps)
endforeach()
endif()
endmacro()

View File

@ -174,7 +174,7 @@ if(SYSTEMSTUBS_LIBRARY)
list(APPEND PLATFORM_LINKLIBS SystemStubs)
endif()
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing")
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing -ffp-contract=off")
set(PLATFORM_LINKFLAGS
"-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio -framework Metal -framework QuartzCore"
)

View File

@ -692,10 +692,12 @@ if(WITH_GHOST_WAYLAND)
if(WITH_GHOST_WAYLAND_LIBDECOR)
if(_use_system_wayland)
pkg_check_modules(libdecor REQUIRED libdecor-0>=0.1)
pkg_check_modules(libdecor libdecor-0>=0.1)
else()
set(libdecor_INCLUDE_DIRS "${LIBDIR}/wayland_libdecor/include/libdecor-0")
set(libdecor_FOUND ON)
endif()
set_and_warn_library_found("libdecor" libdecor_FOUND WITH_GHOST_WAYLAND_LIBDECOR)
endif()
if(WITH_GHOST_WAYLAND_DBUS)
@ -803,8 +805,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
# Automatically turned on when building with "-march=native". This is
# explicitly turned off here as it will make floating point math give a bit
# different results. This will lead to automated test failures. So disable
# this until we support it. Seems to default to off in clang and the intel
# compiler.
# this until we support it.
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
# `maybe-uninitialized` is unreliable in release builds, but fine in debug builds.
@ -892,7 +893,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
# CLang is the same as GCC for now.
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing")
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
find_program(MOLD_BIN "mold")

View File

@ -9,7 +9,7 @@ buildbot:
cuda11:
version: '11.4.1'
hip:
version: '5.3.22480'
version: '5.5.30571'
optix:
version: '7.3.0'
ocloc:

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
'''
"""
This script generates the blender.1 man page, embedding the help text
from the Blender executable itself. Invoke it as follows:
@ -9,7 +9,7 @@ from the Blender executable itself. Invoke it as follows:
where <path-to-blender> is the path to the Blender executable,
and <output-filename> is where to write the generated man page.
'''
"""
import argparse
import os
@ -87,29 +87,29 @@ def man_page_from_blender_help(fh: TextIO, blender_bin: str, verbose: bool) -> N
(blender_info["date"], blender_info["version"].replace(".", "\\&."))
)
fh.write(r'''
fh.write(r"""
.SH NAME
blender \- a full-featured 3D application''')
blender \- a full-featured 3D application""")
fh.write(r'''
fh.write(r"""
.SH SYNOPSIS
.B blender [args ...] [file] [args ...]''')
.B blender [args ...] [file] [args ...]""")
fh.write(r'''
fh.write(r"""
.br
.SH DESCRIPTION
.PP
.B blender
is a full-featured 3D application. It supports the entirety of the 3D pipeline - '''
'''modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
is a full-featured 3D application. It supports the entirety of the 3D pipeline - """
"""modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
Use Blender to create 3D images and animations, films and commercials, content for games, '''
r'''architectural and industrial visualizations, and scientific visualizations.
Use Blender to create 3D images and animations, films and commercials, content for games, """
r"""architectural and industrial visualizations, and scientific visualizations.
https://www.blender.org''')
https://www.blender.org""")
fh.write(r'''
.SH OPTIONS''')
fh.write(r"""
.SH OPTIONS""")
fh.write("\n\n")
@ -152,7 +152,7 @@ https://www.blender.org''')
# Footer Content.
fh.write(r'''
fh.write(r"""
.br
.SH SEE ALSO
.B luxrender(1)
@ -162,7 +162,7 @@ https://www.blender.org''')
This manpage was written for a Debian GNU/Linux system by Daniel Mester
<mester@uni-bremen.de> and updated by Cyril Brulebois
<cyril.brulebois@enst-bretagne.fr> and Dan Eicher <dan@trollwerks.org>.
''')
""")
def create_argparse() -> argparse.ArgumentParser:

View File

@ -865,29 +865,40 @@ Unfortunate Corner Cases
Besides all expected cases listed above, there are a few others that should not be
an issue but, due to internal implementation details, currently are:
- ``Object.hide_viewport``, ``Object.hide_select`` and ``Object.hide_render``:
Setting any of those Booleans will trigger a rebuild of Collection caches,
thus breaking any current iteration over ``Collection.all_objects``.
Collection Objects
^^^^^^^^^^^^^^^^^^
Changing: ``Object.hide_viewport``, ``Object.hide_select`` or ``Object.hide_render``
will trigger a rebuild of Collection caches, thus breaking any current iteration over ``Collection.all_objects``.
.. rubric:: Do not:
.. code-block:: python
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
.. rubric:: Do not:
.. rubric:: Do:
.. code-block:: python
.. code-block:: python
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
.. rubric:: Do:
Data-Blocks Renaming During Iteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: python
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
Data-blocks accessed from ``bpy.data`` are sorted when their name is set.
Any loop that iterates of a data such as ``bpy.data.objects`` for example,
and sets the objects ``name`` must get all items from the iterator first (typically by converting to a list or tuple)
to avoid missing some objects and iterating over others multiple times.
sys.exit

View File

@ -572,7 +572,7 @@ template<class T> inline bool cmpMinMax(T &minv, T &maxv, const T &val)
}
template<> inline bool cmpMinMax<Vec3>(Vec3 &minv, Vec3 &maxv, const Vec3 &val)
{
return (cmpMinMax(minv.x, maxv.x, val.x) | cmpMinMax(minv.y, maxv.y, val.y) |
return (cmpMinMax(minv.x, maxv.x, val.x) || cmpMinMax(minv.y, maxv.y, val.y) ||
cmpMinMax(minv.z, maxv.z, val.z));
}

View File

@ -281,6 +281,9 @@ endif()
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
add_definitions(-DWITH_EMBREE_GPU)
endif()
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
include_directories(
SYSTEM

View File

@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
default=False,
)
use_oneapirt: BoolProperty(
name="Embree on GPU (Experimental)",
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
"However this support is experimental and some scenes may render incorrectly",
default=False,
)
kernel_optimization_level: EnumProperty(
name="Kernel Optimization",
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
@ -1676,16 +1683,16 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
elif device_type == 'HIP':
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
driver_version = "22.10"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
row = layout.row()
row.use_property_split = True
row.prop(self, "use_oneapirt")
def draw(self, context):
self.draw_impl(self.layout, context)

View File

@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
device.has_peer_memory = false;
}
if (get_boolean(cpreferences, "use_metalrt")) {
device.use_metalrt = true;
bool accumulated_use_hardware_raytracing = false;
foreach (
DeviceInfo &info,
(device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
info.use_hardware_raytracing = false;
}
if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
info.use_hardware_raytracing = false;
}
/* There is an accumulative logic here, because Multi-devices are support only for
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
* global boolean backend setting (see above) for enabling/disabling HW RT,
* so all sub-devices in the multi-device should enable (or disable) HW RT
* simultaneously (and CPU device are expected to ignore `use_hardware_raytracing` setting). */
accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
}
device.use_hardware_raytracing = accumulated_use_hardware_raytracing;
if (preview) {
/* Disable specialization for preview renders. */

View File

@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
#endif /* WITH_EMBREE */
#ifdef WITH_EMBREE_GPU
PyModule_AddObject(mod, "with_embree_gpu", Py_True);
Py_INCREF(Py_True);
#else /* WITH_EMBREE_GPU */
PyModule_AddObject(mod, "with_embree_gpu", Py_False);
Py_INCREF(Py_False);
#endif /* WITH_EMBREE_GPU */
if (ccl::openimagedenoise_supported()) {
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
Py_INCREF(Py_True);

View File

@ -1061,7 +1061,7 @@ void BlenderSession::ensure_display_driver_if_needed()
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(
b_engine, b_scene, background);
display_driver_ = display_driver.get();
session->set_display_driver(move(display_driver));
session->set_display_driver(std::move(display_driver));
}
CCL_NAMESPACE_END

View File

@ -606,7 +606,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
for (size_t i = 0; i < bvh_nodes_size;) {
size_t nsize, nsize_bbox;
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
nsize = BVH_UNALIGNED_NODE_SIZE;

View File

@ -111,9 +111,13 @@ BVHEmbree::~BVHEmbree()
}
}
void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
void BVHEmbree::build(Progress &progress,
Stats *stats,
RTCDevice rtc_device_,
const bool rtc_device_is_sycl_)
{
rtc_device = rtc_device_;
rtc_device_is_sycl = rtc_device_is_sycl_;
assert(rtc_device);
rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
@ -266,15 +270,29 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
const int *triangles = mesh->get_triangles().data();
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
if (!rtc_device_is_sycl) {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
}
else {
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
* happen on GPU, and we cannot use standard host pointers at this point. So instead
* of making a shared geometry buffer - a new Embree buffer will be created and data
* will be copied. */
int *triangles_buffer = (int *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
assert(triangles_buffer);
if (triangles_buffer) {
static_assert(sizeof(int) == sizeof(uint));
std::memcpy(triangles_buffer, triangles, sizeof(int) * 3 * (num_triangles));
}
}
set_tri_vertex_buffer(geom_id, mesh, false);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
@ -323,14 +341,38 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
}
else {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
if (!rtc_device_is_sycl) {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
}
else {
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
* happen on GPU, and we cannot use standard host pointers at this point. So instead
* of making a shared geometry buffer - a new Embree buffer will be created and data
* will be copied. */
/* As float3 is packed on GPU side, we map it to packed_float3. */
packed_float3 *verts_buffer = (packed_float3 *)rtcSetNewGeometryBuffer(
geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
sizeof(packed_float3),
num_verts + 1);
assert(verts_buffer);
if (verts_buffer) {
for (size_t i = (size_t)0; i < num_verts + 1; ++i) {
verts_buffer[i].x = verts[i].x;
verts_buffer[i].y = verts[i].y;
verts_buffer[i].z = verts[i].z;
}
}
}
}
}
}

View File

@ -29,7 +29,10 @@ class PointCloud;
class BVHEmbree : public BVH {
public:
void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
void build(Progress &progress,
Stats *stats,
RTCDevice rtc_device,
const bool isSyclEmbreeDevice = false);
void refit(Progress &progress);
RTCScene scene;
@ -55,6 +58,7 @@ class BVHEmbree : public BVH {
const bool update);
RTCDevice rtc_device;
bool rtc_device_is_sycl;
enum RTCBuildQuality build_quality;
};

View File

@ -42,15 +42,19 @@ endif()
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
if(UNIX)
# Disabled until there is a HIP 5.5 release for Linux.
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
else()
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
find_package(HIP 5.5.0)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# find_package(HIP)
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# if(HIP_FOUND)
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
# endif()
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
endif()
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
texture_info.free();
}
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
#ifdef WITH_EMBREE

View File

@ -56,7 +56,7 @@ class CPUDevice : public Device {
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
~CPUDevice();
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */

View File

@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
return path_exists(cubins_path);
}
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
return BVH_LAYOUT_BVH2;
}

View File

@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {
static bool have_precompiled_kernels();
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;

View File

@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_guiding = true;
info.has_profiling = true;
info.has_peer_memory = false;
info.use_metalrt = false;
info.use_hardware_raytracing = false;
info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_guiding &= device.has_guiding;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
info.use_metalrt |= device.use_metalrt;
info.use_hardware_raytracing |= device.use_hardware_raytracing;
info.denoisers &= device.denoisers;
}

View File

@ -71,15 +71,16 @@ class DeviceInfo {
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
*/
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
* kernels (Metal only). */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
@ -101,7 +102,7 @@ class DeviceInfo {
has_profiling = false;
has_peer_memory = false;
has_gpu_queue = false;
use_metalrt = false;
use_hardware_raytracing = false;
denoisers = DENOISER_NONE;
}
@ -157,7 +158,7 @@ class Device {
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;
/* statistics */
Stats &stats;

View File

@ -20,7 +20,7 @@ class DummyDevice : public Device {
~DummyDevice() {}
virtual BVHLayoutMask get_bvh_layout_mask() const override
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
{
return 0;
}

View File

@ -137,7 +137,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
info.num = num;
info.has_nanovdb = true;
info.has_light_tree = false;
info.has_light_tree = true;
info.denoisers = 0;
info.has_gpu_queue = true;

View File

@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
return path_exists(fatbins_path);
}
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
return BVH_LAYOUT_BVH2;
}

View File

@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {
static bool have_precompiled_kernels();
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;

View File

@ -3,7 +3,9 @@
#include "device/kernel.h"
#include "util/log.h"
#ifndef __KERNEL_ONEAPI__
# include "util/log.h"
#endif
CCL_NAMESPACE_BEGIN
@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
case DEVICE_KERNEL_NUM:
break;
};
#ifndef __KERNEL_ONEAPI__
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
#endif
return "UNKNOWN";
}
#ifndef __KERNEL_ONEAPI__
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
{
os << device_kernel_as_string(kernel);
@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)
return str;
}
#endif
CCL_NAMESPACE_END

View File

@ -3,11 +3,13 @@
#pragma once
#include "kernel/types.h"
#ifndef __KERNEL_ONEAPI__
# include "kernel/types.h"
#include "util/string.h"
# include "util/string.h"
#include <ostream> // NOLINT
# include <ostream> // NOLINT
#endif
CCL_NAMESPACE_BEGIN
@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
bool device_kernel_has_intersection(DeviceKernel kernel);
const char *device_kernel_as_string(DeviceKernel kernel);
#ifndef __KERNEL_ONEAPI__
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
typedef uint64_t DeviceKernelMask;
string device_kernel_mask_as_string(DeviceKernelMask mask);
#endif
CCL_NAMESPACE_END

View File

@ -100,7 +100,7 @@ class MetalDevice : public Device {
virtual void cancel() override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;

View File

@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
return get_device_by_ID(ID, lock) == nullptr;
}
BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
}
@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
case METAL_GPU_AMD: {
max_threads_per_threadgroup = 128;
use_metalrt = info.use_metalrt;
use_metalrt = info.use_hardware_raytracing;
break;
}
case METAL_GPU_APPLE: {
max_threads_per_threadgroup = 512;
use_metalrt = info.use_metalrt;
use_metalrt = info.use_hardware_raytracing;
break;
}
}

View File

@ -96,12 +96,13 @@ class MultiDevice : public Device {
return error_msg;
}
virtual BVHLayoutMask get_bvh_layout_mask() const override
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
foreach (const SubDevice &sub_device, devices) {
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
kernel_features);
bvh_layout_mask &= device_bvh_layout_mask;
bvh_layout_mask_all |= device_bvh_layout_mask;
}

View File

@ -40,12 +40,12 @@ bool device_oneapi_init()
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
}
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
_putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
}
else {
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
}
}
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
@ -58,10 +58,10 @@ bool device_oneapi_init()
setenv("SYCL_CACHE_PERSISTENT", "1", false);
setenv("SYCL_CACHE_THRESHOLD", "0", false);
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
}
else {
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
}
setenv("SYCL_ENABLE_PCI", "1", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
}
#ifdef WITH_ONEAPI
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
static void device_iterator_cb(
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
{
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
@ -112,6 +113,13 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
info.display_device = false;
# ifdef WITH_EMBREE_GPU
info.use_hardware_raytracing = hwrt_support;
# else
info.use_hardware_raytracing = false;
(void)hwrt_support;
# endif
devices->push_back(info);
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
}

View File

@ -8,7 +8,19 @@
# include "util/debug.h"
# include "util/log.h"
# ifdef WITH_EMBREE_GPU
# include "bvh/embree.h"
# endif
# include "kernel/device/oneapi/globals.h"
# include "kernel/device/oneapi/kernel.h"
# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
/* These declarations are missing from embree headers when compiling from a compiler that doesn't
* support SYCL. */
extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
# endif
CCL_NAMESPACE_BEGIN
@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler),
device_queue_(nullptr),
# ifdef WITH_EMBREE_GPU
embree_device(nullptr),
embree_scene(nullptr),
# endif
texture_info_(this, "texture_info", MEM_GLOBAL),
kg_memory_(nullptr),
kg_memory_device_(nullptr),
kg_memory_size_(0)
{
need_texture_info_ = false;
use_hardware_raytracing = info.use_hardware_raytracing;
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
bool is_finished_ok = create_queue(device_queue_, info.num);
bool is_finished_ok = create_queue(device_queue_,
info.num,
# ifdef WITH_EMBREE_GPU
use_hardware_raytracing ? &embree_device : nullptr
# else
nullptr
# endif
);
if (is_finished_ok == false) {
set_error("oneAPI queue initialization error: got runtime exception \"" +
oneapi_error_string_ + "\"");
@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
assert(device_queue_);
}
# ifdef WITH_EMBREE_GPU
use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
# else
use_hardware_raytracing = false;
# endif
if (use_hardware_raytracing) {
VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
}
size_t globals_segment_size;
is_finished_ok = kernel_globals_size(globals_segment_size);
if (is_finished_ok == false) {
@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
OneapiDevice::~OneapiDevice()
{
# ifdef WITH_EMBREE_GPU
if (embree_device)
rtcReleaseDevice(embree_device);
# endif
texture_info_.free();
usm_free(device_queue_, kg_memory_);
usm_free(device_queue_, kg_memory_device_);
@ -80,15 +120,47 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
return false;
}
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
{
return BVH_LAYOUT_BVH2;
/* MNEE and Ray-trace kernels currently don't work correctly with HWRT. */
return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
}
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
{
return (use_hardware_raytracing &&
can_use_hardware_raytracing_for_features(requested_features)) ?
BVH_LAYOUT_EMBREE :
BVH_LAYOUT_BVH2;
}
# ifdef WITH_EMBREE_GPU
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
if (refit) {
bvh_embree->refit(progress);
}
else {
bvh_embree->build(progress, &stats, embree_device, true);
}
if (bvh->params.top_level) {
embree_scene = bvh_embree->scene;
}
}
else {
Device::build_bvh(bvh, progress, refit);
}
}
# endif
bool OneapiDevice::load_kernels(const uint requested_features)
{
assert(device_queue_);
kernel_features = requested_features;
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
if (is_finished_ok == false) {
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
@ -100,7 +172,14 @@ bool OneapiDevice::load_kernels(const uint requested_features)
assert(device_queue_);
}
is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
VLOG_INFO
<< "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
use_hardware_raytracing = false;
}
is_finished_ok = oneapi_load_kernels(
device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
if (is_finished_ok == false) {
set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
}
@ -327,6 +406,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
<< string_human_readable_number(size) << " bytes. ("
<< string_human_readable_size(size) << ")";
# ifdef WITH_EMBREE_GPU
if (strcmp(name, "data") == 0) {
assert(size <= sizeof(KernelData));
/* Update scene handle(since it is different for each device on multi devices) */
KernelData *const data = (KernelData *)host;
data->device_bvh = embree_scene;
}
# endif
ConstMemMap::iterator i = const_mem_map_.find(name);
device_vector<uchar> *data;
@ -446,7 +535,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
# endif
}
bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
bool OneapiDevice::create_queue(SyclQueue *&external_queue,
int device_index,
void *embree_device_pointer)
{
bool finished_correct = true;
try {
@ -457,6 +548,13 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
sycl::queue *created_queue = new sycl::queue(devices[device_index],
sycl::property::queue::in_order());
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
# ifdef WITH_EMBREE_GPU
if (embree_device_pointer) {
*((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
}
# else
(void)embree_device_pointer;
# endif
}
catch (sycl::exception const &e) {
finished_correct = false;
@ -625,7 +723,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
size_t global_size,
void **args)
{
return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
return oneapi_enqueue_kernel(
kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
}
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
@ -767,9 +866,9 @@ char *OneapiDevice::device_capabilities()
sycl::id<3> max_work_item_sizes =
device.get_info<sycl::info::device::max_work_item_sizes<3>>();
WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
WRITE_ATTR(max_work_item_sizes_dim0, ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR(max_work_item_sizes_dim1, ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR(max_work_item_sizes_dim2, ((size_t)max_work_item_sizes.get(2)))
GET_NUM_ATTR(max_work_group_size)
GET_NUM_ATTR(max_num_sub_groups)
@ -792,7 +891,7 @@ char *OneapiDevice::device_capabilities()
GET_NUM_ATTR(native_vector_width_half)
size_t max_clock_frequency = device.get_info<sycl::info::device::max_clock_frequency>();
WRITE_ATTR("max_clock_frequency", max_clock_frequency)
WRITE_ATTR(max_clock_frequency, max_clock_frequency)
GET_NUM_ATTR(address_bits)
GET_NUM_ATTR(max_mem_alloc_size)
@ -801,7 +900,7 @@ char *OneapiDevice::device_capabilities()
* supported so we always return false, even if device supports HW texture usage acceleration.
*/
bool image_support = false;
WRITE_ATTR("image_support", (size_t)image_support)
WRITE_ATTR(image_support, (size_t)image_support)
GET_NUM_ATTR(max_parameter_size)
GET_NUM_ATTR(mem_base_addr_align)
@ -830,12 +929,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
std::string name = device.get_info<sycl::info::device::name>();
# else
std::string name = "SYCL Host Task (Debug)";
# endif
# ifdef WITH_EMBREE_GPU
bool hwrt_support = rtcIsSYCLDeviceSupported(device);
# else
bool hwrt_support = false;
# endif
std::string id = "ONEAPI_" + platform_name + "_" + name;
if (device.has(sycl::aspect::ext_intel_pci_address)) {
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
}
(cb)(id.c_str(), name.c_str(), num, user_ptr);
(cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
num++;
}
}

View File

@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
const char *name,
int num,
void *user_ptr);
typedef void (*OneAPIDeviceIteratorCallback)(
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);
class OneapiDevice : public Device {
private:
SyclQueue *device_queue_;
# ifdef WITH_EMBREE_GPU
RTCDevice embree_device;
RTCScene embree_scene;
# endif
using ConstMemMap = map<string, device_vector<uchar> *>;
ConstMemMap const_mem_map_;
device_vector<TextureInfo> texture_info_;
@ -34,17 +35,21 @@ class OneapiDevice : public Device {
size_t kg_memory_size_ = (size_t)0;
size_t max_memory_on_device_ = (size_t)0;
std::string oneapi_error_string_;
bool use_hardware_raytracing = false;
unsigned int kernel_features = 0;
public:
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~OneapiDevice();
# ifdef WITH_EMBREE_GPU
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
# endif
bool check_peer_access(Device *peer_device) override;
bool load_kernels(const uint requested_features) override;
bool load_kernels(const uint kernel_features) override;
void load_texture_info();
@ -113,8 +118,9 @@ class OneapiDevice : public Device {
SyclQueue *sycl_queue();
protected:
bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
bool create_queue(SyclQueue *&external_queue, int device_index);
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
void free_queue(SyclQueue *queue);
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);

View File

@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
return make_unique<OptiXDeviceQueue>(this);
}
BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
/* OptiX has its own internal acceleration structure format. */
return BVH_LAYOUT_OPTIX;

View File

@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
~OptiXDevice();
BVHLayoutMask get_bvh_layout_mask() const override;
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
string compile_kernel_get_common_cflags(const uint kernel_features);

View File

@ -574,7 +574,7 @@ void PathTrace::denoise(const RenderWork &render_work)
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
{
output_driver_ = move(driver);
output_driver_ = std::move(driver);
}
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
@ -585,7 +585,7 @@ void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
destroy_gpu_resources();
if (driver) {
display_ = make_unique<PathTraceDisplay>(move(driver));
display_ = make_unique<PathTraceDisplay>(std::move(driver));
}
else {
display_ = nullptr;

View File

@ -9,7 +9,9 @@
CCL_NAMESPACE_BEGIN
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) {}
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(std::move(driver))
{
}
void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
{

View File

@ -28,6 +28,7 @@ static size_t estimate_single_state_size(const uint kernel_features)
#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
state_size += (kernel_features & (feature)) ? sizeof(type) : 0;
#define KERNEL_STRUCT_END(name) \
(void)array_index; \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@ -139,6 +140,7 @@ void PathTraceWorkGPU::alloc_integrator_soa()
integrator_state_gpu_.parent_struct[array_index].name = (type *)array->device_pointer; \
}
#define KERNEL_STRUCT_END(name) \
(void)array_index; \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@ -299,8 +301,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
* become busy after adding new tiles). This is especially important for the shadow catcher which
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
0);
work_tile_scheduler_.set_accelerated_rt(
(device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,

View File

@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
device/oneapi/compat.h
device/oneapi/context_begin.h
device/oneapi/context_end.h
device/oneapi/context_intersect_begin.h
device/oneapi/context_intersect_end.h
device/oneapi/globals.h
device/oneapi/image.h
device/oneapi/kernel.h
device/oneapi/kernel_templates.h
device/cpu/bvh.h
)
set(SRC_KERNEL_CLOSURE_HEADERS
@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# Set defaults for spir64 and spir64_gen options
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
endif()
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
@ -775,8 +778,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# Host execution won't use GPU binaries, no need to compile them.
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
# AoT binaries aren't currently reused when calling sycl::build.
list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
# Iterate over all targest and their options
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@ -798,6 +799,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
-I"${NANOVDB_INCLUDE_DIR}")
endif()
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
list(APPEND sycl_compiler_flags
-DWITH_EMBREE
-DWITH_EMBREE_GPU
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
-I"${EMBREE_INCLUDE_DIRS}")
if(WIN32)
list(APPEND sycl_compiler_flags
-ladvapi32.lib
)
endif()
set(next_library_mode "")
foreach(library ${EMBREE_LIBRARIES})
string(TOLOWER "${library}" library_lower)
if(("${library_lower}" STREQUAL "optimized") OR
("${library_lower}" STREQUAL "debug"))
set(next_library_mode "${library_lower}")
else()
if(next_library_mode STREQUAL "")
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
else()
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
endif()
set(next_library_mode "")
endif()
endforeach()
foreach(library ${TBB_LIBRARIES})
string(TOLOWER "${library}" library_lower)
if(("${library_lower}" STREQUAL "optimized") OR
("${library_lower}" STREQUAL "debug"))
set(next_library_mode "${library_lower}")
else()
if(next_library_mode STREQUAL "")
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
else()
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
endif()
set(next_library_mode "")
endif()
endforeach()
list(APPEND sycl_compiler_flags
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
)
endif()
if(WITH_CYCLES_DEBUG)
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
endif()

View File

@ -21,6 +21,28 @@
# define __BVH2__
#endif
#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
/* bool is apparently not tested for specialization constants:
* https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
* Instead of adding one more bool specialization constant, we reuse existing embree_features one
* and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
*/
/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and ray-trace kernels
* pre-compiled without Embree.
* Changing this default value would require updating the logic in oneapi_load_kernels(). */
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
RTC_FEATURE_FLAG_NONE};
# define IF_USING_EMBREE \
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
RTC_FEATURE_FLAG_NONE)
# define IF_NOT_USING_EMBREE \
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
RTC_FEATURE_FLAG_NONE)
#else
# define IF_USING_EMBREE
# define IF_NOT_USING_EMBREE
#endif
CCL_NAMESPACE_BEGIN
#ifdef __BVH2__
@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect(kg, ray, visibility, isect);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect(kg, ray, visibility, isect);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
# endif /* __HAIR__ */
return bvh_intersect_motion(kg, ray, isect, visibility);
}
return bvh_intersect_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
# endif /* __HAIR__ */
return bvh_intersect(kg, ray, isect, visibility);
return bvh_intersect(kg, ray, isect, visibility);
}
kernel_assert(false);
return false;
}
/* Single object BVH traversal, for SSS/AO/bevel. */
@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_local(
kg, ray, local_isect, local_object, lcg_state, max_hits);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
kernel_assert(false);
return false;
}
# endif
@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect_shadow_all(
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_shadow_all(
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
kernel_assert(false);
return false;
}
# endif /* __SHADOW_RECORD_ALL__ */
@ -239,13 +289,28 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
return false;
}
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, visibility);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
return bvh_intersect_volume(kg, ray, isect, visibility);
}
kernel_assert(false);
return false;
}
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
@ -275,18 +340,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
kernel_assert(false);
return false;
}
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */

View File

@ -51,8 +51,6 @@ ccl_device_inline
int object = OBJECT_NONE;
float isect_t = ray->tmax;
int num_hits_in_instance = 0;
uint num_hits = 0;
isect_array->t = ray->tmax;
@ -152,7 +150,6 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
return num_hits;
@ -193,7 +190,6 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
return num_hits;
@ -219,7 +215,6 @@ ccl_device_inline
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
++stack_ptr;

View File

@ -13,8 +13,13 @@
# include <embree3/rtcore_scene.h>
#endif
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/compat.h"
# include "kernel/device/oneapi/globals.h"
#else
# include "kernel/device/cpu/compat.h"
# include "kernel/device/cpu/globals.h"
#endif
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
@ -33,11 +38,16 @@ using numhit_t = uint8_t;
using numhit_t = uint32_t;
#endif
#define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#ifdef __KERNEL_ONEAPI__
# define CYCLES_EMBREE_USED_FEATURES \
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
#else
# define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#endif
#define EMBREE_IS_HAIR(x) (x & 1)
@ -99,7 +109,9 @@ struct CCLVolumeContext
#if EMBREE_MAJOR_VERSION >= 4
KernelGlobals kg;
const Ray *ray;
# ifdef __VOLUME_RECORD_ALL__
numhit_t max_hits;
# endif
numhit_t num_hits;
#endif
Intersection *vol_isect;
@ -252,7 +264,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
* Things like recording subsurface or shadow hits for later evaluation
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls. */
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
@ -263,7 +276,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
if (kernel_embree_is_self_intersection(
@ -277,7 +294,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -290,7 +307,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
Intersection current_isect;
@ -326,7 +347,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
}
/* Test if we need to record this transparent intersection. */
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
if (ctx->num_recorded_hits < max_record_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
@ -363,7 +384,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -376,7 +397,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
/* Check if it's hitting the correct object. */
@ -462,7 +487,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -475,11 +500,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
#ifdef __VOLUME_RECORD_ALL__
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
#endif
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
@ -496,10 +527,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
int object_flag = kernel_data_fetch(object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
#ifndef __VOLUME_RECORD_ALL__
/* Without __VOLUME_RECORD_ALL__ we need only a first counted hit, so we will
* continue tracing only if a current hit is not counted. */
*args->valid = 0;
#endif
}
#ifdef __VOLUME_RECORD_ALL__
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
#endif
}
#if EMBREE_MAJOR_VERSION < 4
@ -513,14 +551,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL:
kernel_embree_filter_occluded_shadow_all_func(args);
kernel_embree_filter_occluded_shadow_all_func_impl(args);
break;
case CCLIntersectContext::RAY_LOCAL:
case CCLIntersectContext::RAY_SSS:
kernel_embree_filter_occluded_local_func(args);
kernel_embree_filter_occluded_local_func_impl(args);
break;
case CCLIntersectContext::RAY_VOLUME_ALL:
kernel_embree_filter_occluded_volume_all_func(args);
kernel_embree_filter_occluded_volume_all_func_impl(args);
break;
case CCLIntersectContext::RAY_REGULAR:
@ -569,7 +607,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
kernel_embree_filter_occluded_func(args);
}
#endif
#ifdef __KERNEL_ONEAPI__
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_intersection_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_local_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
}
# define kernel_embree_filter_intersection_func \
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
# define kernel_embree_filter_occluded_shadow_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
# define kernel_embree_filter_occluded_local_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
# define kernel_embree_filter_occluded_volume_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
#else
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
# if EMBREE_MAJOR_VERSION >= 4
# define kernel_embree_filter_occluded_shadow_all_func \
kernel_embree_filter_occluded_shadow_all_func_impl
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
# define kernel_embree_filter_occluded_volume_all_func \
kernel_embree_filter_occluded_volume_all_func_impl
# endif
#endif
/* Scene intersection. */
@ -583,7 +677,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
#if EMBREE_MAJOR_VERSION >= 4
CCLFirstHitContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
#else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
rtcInitIntersectContext(&ctx);
@ -596,7 +698,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
#if EMBREE_MAJOR_VERSION >= 4
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
@ -625,7 +727,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
CCLLocalContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg,
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
@ -646,7 +756,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
# endif
@ -692,7 +802,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowStateCPU *state,
IntegratorShadowState state,
ccl_private const Ray *ray,
uint visibility,
uint max_hits,
@ -702,7 +812,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
CCLShadowContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
rtcInitIntersectContext(&ctx);
@ -718,7 +836,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_shadow_all_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
@ -736,19 +855,31 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private Intersection *isect,
# ifdef __VOLUME_RECORD_ALL__
const uint max_hits,
# endif
const uint visibility)
{
# if EMBREE_MAJOR_VERSION >= 4
CCLVolumeContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko) Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
rtcInitIntersectContext(&ctx);
# endif
ctx.vol_isect = isect;
# ifdef __VOLUME_RECORD_ALL__
ctx.max_hits = numhit_t(max_hits);
# endif
ctx.num_hits = numhit_t(0);
ctx.ray = ray;
RTCRay rtc_ray;
@ -756,7 +887,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_volume_all_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);

View File

@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
/* Intersection kernels need access to the kernel handler for specialization constants to work
* properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
@ -185,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_background,
ccl_global const int *path_index_array,
@ -249,6 +259,12 @@ ccl_gpu_kernel_postfix
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
#endif
/* Kernels using intersections need access to the kernel handler for specialization constants to
* work properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
ccl_global const int *path_index_array,
@ -287,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,

View File

@ -224,10 +224,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
int blas_index = metal_ancillaries->blas_userID_to_index_lookUp[local_object];
// transform the ray into object's local space
Transform itfm = kernel_data_fetch(objects, local_object).itfm;
r.origin = transform_point(&itfm, r.origin);
r.direction = transform_direction(&itfm, r.direction);
if (!(kernel_data_fetch(object_flag, local_object) & SD_OBJECT_TRANSFORM_APPLIED)) {
// transform the ray into object's local space
Transform itfm = kernel_data_fetch(objects, local_object).itfm;
r.origin = transform_point(&itfm, r.origin);
r.direction = transform_direction(&itfm, r.direction);
}
intersection = metalrt_intersect.intersect(
r,

View File

@ -5,6 +5,11 @@
#define __KERNEL_GPU__
#define __KERNEL_ONEAPI__
#define __KERNEL_64_BIT__
#ifdef WITH_EMBREE_GPU
# define __KERNEL_GPU_RAYTRACING__
#endif
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
@ -57,17 +62,19 @@
#define ccl_gpu_kernel_threads(block_num_threads)
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
# define ccl_gpu_kernel_signature(name, ...) \
# define __ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
size_t kernel_local_size, \
sycl::handler &cgh, \
__VA_ARGS__) { \
(kg); \
cgh.parallel_for<class kernel_##name>( \
cgh.parallel_for( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
[=](sycl::nd_item<1> item) {
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
# define ccl_gpu_kernel_postfix \
}); \
}

View File

@ -0,0 +1,18 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2023 Intel Corporation */
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
# undef ccl_gpu_kernel_signature
# define ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
size_t kernel_local_size, \
sycl::handler &cgh, \
__VA_ARGS__) \
{ \
(kg); \
cgh.parallel_for( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
[=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
#endif

View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2023 Intel Corporation */
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
# undef ccl_gpu_kernel_signature
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
#endif

View File

@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
size_t nd_item_group_range_0;
size_t nd_item_global_id_0;
size_t nd_item_global_range_0;
#else
sycl::kernel_handler kernel_handler;
#endif
} KernelGlobalsGPU;

View File

@ -16,9 +16,22 @@
# include "kernel/device/gpu/kernel.h"
# include "device/kernel.cpp"
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
# ifdef WITH_EMBREE_GPU
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
(const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
(const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
# endif
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
@ -142,15 +155,99 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
return std::min(limit_work_group_size, preferred_work_group_size);
}
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
const uint kernel_features)
{
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
std::string::npos)
return false;
return true;
}
bool oneapi_kernel_is_raytrace_or_mnee(const std::string &kernel_name)
{
return (kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
std::string::npos) ||
(kernel_name.find(device_kernel_as_string(
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) != std::string::npos);
}
bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
{
# ifdef WITH_EMBREE_GPU
/* MNEE and Ray-trace kernels aren't yet enabled to use Embree. */
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
DeviceKernel kernel = (DeviceKernel)i;
if (device_kernel_has_intersection(kernel)) {
if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
return !oneapi_kernel_is_raytrace_or_mnee(kernel_name);
}
}
}
# endif
return false;
}
bool oneapi_load_kernels(SyclQueue *queue_,
const uint kernel_features,
bool use_hardware_raytracing)
{
# ifdef SYCL_SKIP_KERNELS_PRELOAD
(void)queue_;
(void)requested_features;
# else
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
# ifdef WITH_EMBREE_GPU
/* For best performance, we always JIT compile the kernels that are using Embree. */
if (use_hardware_raytracing) {
try {
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
{queue->get_device()});
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
const std::string &kernel_name = kernel_id.get_name();
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
!oneapi_kernel_is_using_embree(kernel_name)) {
continue;
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
/* Hair requires embree curves support. */
if (kernel_features & KERNEL_FEATURE_HAIR) {
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
sycl::build(one_kernel_bundle_input);
}
else {
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
sycl::build(one_kernel_bundle_input);
}
}
}
catch (sycl::exception const &e) {
if (s_error_cb) {
s_error_cb(e.what(), s_error_user_ptr);
}
return false;
}
}
# endif
try {
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
@ -159,27 +256,29 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
const std::string &kernel_name = kernel_id.get_name();
/* NOTE(@nsirgien): Names in this conditions below should match names from
* oneapi_call macro in oneapi_enqueue_kernel below */
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
/* In case HWRT is on, compilation of kernels using Embree is already handled in previous
* block. */
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
(use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
continue;
}
if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
# ifdef WITH_EMBREE_GPU
if (oneapi_kernel_is_using_embree(kernel_name) ||
oneapi_kernel_is_raytrace_or_mnee(kernel_name)) {
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
RTC_FEATURE_FLAG_NONE);
sycl::build(one_kernel_bundle_input);
continue;
}
if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
std::string::npos) {
continue;
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
sycl::build(one_kernel_bundle);
# endif
/* This call will ensure that AoT or cached JIT binaries are available
* for execution. It will trigger compilation if it is not already the case. */
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
{kernel_id});
}
}
catch (sycl::exception const &e) {
@ -188,13 +287,14 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
}
return false;
}
# endif
return true;
}
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
int kernel,
size_t global_size,
const uint kernel_features,
bool use_hardware_raytracing,
void **args)
{
bool success = true;
@ -248,6 +348,21 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
try {
queue->submit([&](sycl::handler &cgh) {
# ifdef WITH_EMBREE_GPU
/* Spec says it has no effect if the called kernel doesn't support the below specialization
* constant but it can still trigger a recompilation, so we set it only if needed. */
if (device_kernel_has_intersection(device_kernel)) {
const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
RTC_FEATURE_FLAG_NONE :
!(kernel_features & KERNEL_FEATURE_HAIR) ?
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
used_embree_features);
}
# else
(void)kernel_features;
# endif
switch (device_kernel) {
case DEVICE_KERNEL_INTEGRATOR_RESET: {
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
@ -549,4 +664,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
#endif /* WITH_ONEAPI */

View File

@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
int kernel,
size_t global_size,
const unsigned int kernel_features,
bool use_hardware_raytracing,
void **args);
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
const unsigned int requested_features);
const unsigned int kernel_features,
bool use_hardware_raytracing);
# ifdef __cplusplus
}
# endif
#endif /* WITH_ONEAPI */

View File

@ -342,7 +342,7 @@ ccl_device_forceinline void area_light_update_position(const ccl_global KernelLi
ls->D = normalize_len(ls->P - P, &ls->t);
ls->pdf = invarea;
if (klight->area.tan_half_spread > 0) {
if (klight->area.normalize_spread > 0) {
ls->eval_fac = 0.25f * invarea;
ls->eval_fac *= area_light_spread_attenuation(
ls->D, ls->Ng, klight->area.tan_half_spread, klight->area.normalize_spread);

View File

@ -3,8 +3,9 @@
#pragma once
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
# if EMBREE_MAJOR_VERSION >= 4
#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
defined(WITH_EMBREE)
# if EMBREE_MAJOR_VERSION == 4
# include <embree4/rtcore.h>
# include <embree4/rtcore_scene.h>
# else
@ -78,9 +79,8 @@ CCL_NAMESPACE_BEGIN
#define __VISIBILITY_FLAG__
#define __VOLUME__
/* TODO: solve internal compiler errors and enable light tree on HIP. */
/* TODO: solve internal compiler perf issue and enable light tree on Metal/AMD. */
#if defined(__KERNEL_HIP__) || defined(__KERNEL_METAL_AMD__)
#if defined(__KERNEL_METAL_AMD__)
# undef __LIGHT_TREE__
#endif

View File

@ -15,8 +15,12 @@ set(SRC
camera.cpp
colorspace.cpp
constant_fold.cpp
devicescene.cpp
film.cpp
geometry.cpp
geometry_attributes.cpp
geometry_bvh.cpp
geometry_mesh.cpp
hair.cpp
image.cpp
image_oiio.cpp
@ -55,6 +59,7 @@ set(SRC_HEADERS
camera.h
colorspace.h
constant_fold.h
devicescene.h
film.h
geometry.h
hair.h

View File

@ -0,0 +1,64 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "scene/devicescene.h"
#include "device/device.h"
#include "device/memory.h"
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
: bvh_nodes(device, "bvh_nodes", MEM_GLOBAL),
bvh_leaf_nodes(device, "bvh_leaf_nodes", MEM_GLOBAL),
object_node(device, "object_node", MEM_GLOBAL),
prim_type(device, "prim_type", MEM_GLOBAL),
prim_visibility(device, "prim_visibility", MEM_GLOBAL),
prim_index(device, "prim_index", MEM_GLOBAL),
prim_object(device, "prim_object", MEM_GLOBAL),
prim_time(device, "prim_time", MEM_GLOBAL),
tri_verts(device, "tri_verts", MEM_GLOBAL),
tri_shader(device, "tri_shader", MEM_GLOBAL),
tri_vnormal(device, "tri_vnormal", MEM_GLOBAL),
tri_vindex(device, "tri_vindex", MEM_GLOBAL),
tri_patch(device, "tri_patch", MEM_GLOBAL),
tri_patch_uv(device, "tri_patch_uv", MEM_GLOBAL),
curves(device, "curves", MEM_GLOBAL),
curve_keys(device, "curve_keys", MEM_GLOBAL),
curve_segments(device, "curve_segments", MEM_GLOBAL),
patches(device, "patches", MEM_GLOBAL),
points(device, "points", MEM_GLOBAL),
points_shader(device, "points_shader", MEM_GLOBAL),
objects(device, "objects", MEM_GLOBAL),
object_motion_pass(device, "object_motion_pass", MEM_GLOBAL),
object_motion(device, "object_motion", MEM_GLOBAL),
object_flag(device, "object_flag", MEM_GLOBAL),
object_volume_step(device, "object_volume_step", MEM_GLOBAL),
object_prim_offset(device, "object_prim_offset", MEM_GLOBAL),
camera_motion(device, "camera_motion", MEM_GLOBAL),
attributes_map(device, "attributes_map", MEM_GLOBAL),
attributes_float(device, "attributes_float", MEM_GLOBAL),
attributes_float2(device, "attributes_float2", MEM_GLOBAL),
attributes_float3(device, "attributes_float3", MEM_GLOBAL),
attributes_float4(device, "attributes_float4", MEM_GLOBAL),
attributes_uchar4(device, "attributes_uchar4", MEM_GLOBAL),
light_distribution(device, "light_distribution", MEM_GLOBAL),
lights(device, "lights", MEM_GLOBAL),
light_background_marginal_cdf(device, "light_background_marginal_cdf", MEM_GLOBAL),
light_background_conditional_cdf(device, "light_background_conditional_cdf", MEM_GLOBAL),
light_tree_nodes(device, "light_tree_nodes", MEM_GLOBAL),
light_tree_emitters(device, "light_tree_emitters", MEM_GLOBAL),
light_to_tree(device, "light_to_tree", MEM_GLOBAL),
object_to_tree(device, "object_to_tree", MEM_GLOBAL),
object_lookup_offset(device, "object_lookup_offset", MEM_GLOBAL),
triangle_to_tree(device, "triangle_to_tree", MEM_GLOBAL),
particles(device, "particles", MEM_GLOBAL),
svm_nodes(device, "svm_nodes", MEM_GLOBAL),
shaders(device, "shaders", MEM_GLOBAL),
lookup_table(device, "lookup_table", MEM_GLOBAL),
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
ies_lights(device, "ies", MEM_GLOBAL)
{
memset((void *)&data, 0, sizeof(data));
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,101 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifndef __DEVICESCENE_H__
#define __DEVICESCENE_H__
#include "device/device.h"
#include "device/memory.h"
#include "util/types.h"
#include "util/vector.h"
CCL_NAMESPACE_BEGIN
class DeviceScene {
public:
/* BVH */
device_vector<int4> bvh_nodes;
device_vector<int4> bvh_leaf_nodes;
device_vector<int> object_node;
device_vector<int> prim_type;
device_vector<uint> prim_visibility;
device_vector<int> prim_index;
device_vector<int> prim_object;
device_vector<float2> prim_time;
/* mesh */
device_vector<packed_float3> tri_verts;
device_vector<uint> tri_shader;
device_vector<packed_float3> tri_vnormal;
device_vector<packed_uint3> tri_vindex;
device_vector<uint> tri_patch;
device_vector<float2> tri_patch_uv;
device_vector<KernelCurve> curves;
device_vector<float4> curve_keys;
device_vector<KernelCurveSegment> curve_segments;
device_vector<uint> patches;
/* point-cloud */
device_vector<float4> points;
device_vector<uint> points_shader;
/* objects */
device_vector<KernelObject> objects;
device_vector<Transform> object_motion_pass;
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
device_vector<float> object_volume_step;
device_vector<uint> object_prim_offset;
/* cameras */
device_vector<DecomposedTransform> camera_motion;
/* attributes */
device_vector<AttributeMap> attributes_map;
device_vector<float> attributes_float;
device_vector<float2> attributes_float2;
device_vector<packed_float3> attributes_float3;
device_vector<float4> attributes_float4;
device_vector<uchar4> attributes_uchar4;
/* lights */
device_vector<KernelLightDistribution> light_distribution;
device_vector<KernelLight> lights;
device_vector<float2> light_background_marginal_cdf;
device_vector<float2> light_background_conditional_cdf;
/* light tree */
device_vector<KernelLightTreeNode> light_tree_nodes;
device_vector<KernelLightTreeEmitter> light_tree_emitters;
device_vector<uint> light_to_tree;
device_vector<uint> object_to_tree;
device_vector<uint> object_lookup_offset;
device_vector<uint> triangle_to_tree;
/* particles */
device_vector<KernelParticle> particles;
/* shaders */
device_vector<int4> svm_nodes;
device_vector<KernelShader> shaders;
/* lookup tables */
device_vector<float> lookup_table;
/* integrator */
device_vector<float> sample_pattern_lut;
/* IES lights */
device_vector<float> ies_lights;
KernelData data;
DeviceScene(Device *device);
};
CCL_NAMESPACE_END
#endif /* __DEVICESCENE_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,38 @@ class Shader;
class Volume;
struct PackedBVH;
/* Set of flags used to help determining what data has been modified or needs reallocation, so we
* can decide which device data to free or update. */
enum {
DEVICE_CURVE_DATA_MODIFIED = (1 << 0),
DEVICE_MESH_DATA_MODIFIED = (1 << 1),
DEVICE_POINT_DATA_MODIFIED = (1 << 2),
ATTR_FLOAT_MODIFIED = (1 << 3),
ATTR_FLOAT2_MODIFIED = (1 << 4),
ATTR_FLOAT3_MODIFIED = (1 << 5),
ATTR_FLOAT4_MODIFIED = (1 << 6),
ATTR_UCHAR4_MODIFIED = (1 << 7),
CURVE_DATA_NEED_REALLOC = (1 << 8),
MESH_DATA_NEED_REALLOC = (1 << 9),
POINT_DATA_NEED_REALLOC = (1 << 10),
ATTR_FLOAT_NEEDS_REALLOC = (1 << 11),
ATTR_FLOAT2_NEEDS_REALLOC = (1 << 12),
ATTR_FLOAT3_NEEDS_REALLOC = (1 << 13),
ATTR_FLOAT4_NEEDS_REALLOC = (1 << 14),
ATTR_UCHAR4_NEEDS_REALLOC = (1 << 15),
ATTRS_NEED_REALLOC = (ATTR_FLOAT_NEEDS_REALLOC | ATTR_FLOAT2_NEEDS_REALLOC |
ATTR_FLOAT3_NEEDS_REALLOC | ATTR_FLOAT4_NEEDS_REALLOC |
ATTR_UCHAR4_NEEDS_REALLOC),
DEVICE_MESH_DATA_NEEDS_REALLOC = (MESH_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
DEVICE_POINT_DATA_NEEDS_REALLOC = (POINT_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
DEVICE_CURVE_DATA_NEEDS_REALLOC = (CURVE_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
};
/* Geometry
*
* Base class for geometric types like Mesh and Hair. */

View File

@ -0,0 +1,722 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "bvh/bvh.h"
#include "bvh/bvh2.h"
#include "device/device.h"
#include "scene/attribute.h"
#include "scene/camera.h"
#include "scene/geometry.h"
#include "scene/hair.h"
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
#include "scene/shader_nodes.h"
#include "scene/stats.h"
#include "scene/volume.h"
#include "subd/patch_table.h"
#include "subd/split.h"
#include "kernel/osl/globals.h"
#include "util/foreach.h"
#include "util/log.h"
#include "util/progress.h"
#include "util/task.h"
CCL_NAMESPACE_BEGIN
bool Geometry::need_attribute(Scene *scene, AttributeStandard std)
{
if (std == ATTR_STD_NONE)
return false;
if (scene->need_global_attribute(std))
return true;
foreach (Node *node, used_shaders) {
Shader *shader = static_cast<Shader *>(node);
if (shader->attributes.find(std))
return true;
}
return false;
}
bool Geometry::need_attribute(Scene * /*scene*/, ustring name)
{
if (name == ustring())
return false;
foreach (Node *node, used_shaders) {
Shader *shader = static_cast<Shader *>(node);
if (shader->attributes.find(name))
return true;
}
return false;
}
AttributeRequestSet Geometry::needed_attributes()
{
AttributeRequestSet result;
foreach (Node *node, used_shaders) {
Shader *shader = static_cast<Shader *>(node);
result.add(shader->attributes);
}
return result;
}
bool Geometry::has_voxel_attributes() const
{
foreach (const Attribute &attr, attributes.attributes) {
if (attr.element == ATTR_ELEMENT_VOXEL) {
return true;
}
}
return false;
}
/* Generate a normal attribute map entry from an attribute descriptor. */
static void emit_attribute_map_entry(AttributeMap *attr_map,
size_t index,
uint64_t id,
TypeDesc type,
const AttributeDescriptor &desc)
{
attr_map[index].id = id;
attr_map[index].element = desc.element;
attr_map[index].offset = as_uint(desc.offset);
if (type == TypeDesc::TypeFloat)
attr_map[index].type = NODE_ATTR_FLOAT;
else if (type == TypeDesc::TypeMatrix)
attr_map[index].type = NODE_ATTR_MATRIX;
else if (type == TypeFloat2)
attr_map[index].type = NODE_ATTR_FLOAT2;
else if (type == TypeFloat4)
attr_map[index].type = NODE_ATTR_FLOAT4;
else if (type == TypeRGBA)
attr_map[index].type = NODE_ATTR_RGBA;
else
attr_map[index].type = NODE_ATTR_FLOAT3;
attr_map[index].flags = desc.flags;
}
/* Generate an attribute map end marker, optionally including a link to another map.
* Links are used to connect object attribute maps to mesh attribute maps. */
static void emit_attribute_map_terminator(AttributeMap *attr_map,
size_t index,
bool chain,
uint chain_link)
{
for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
attr_map[index + j].id = ATTR_STD_NONE;
attr_map[index + j].element = chain; /* link is valid flag */
attr_map[index + j].offset = chain ? chain_link + j : 0; /* link to the correct sub-entry */
attr_map[index + j].type = 0;
attr_map[index + j].flags = 0;
}
}
/* Generate all necessary attribute map entries from the attribute request. */
static void emit_attribute_mapping(
AttributeMap *attr_map, size_t index, uint64_t id, AttributeRequest &req, Geometry *geom)
{
emit_attribute_map_entry(attr_map, index, id, req.type, req.desc);
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->get_num_subd_faces()) {
emit_attribute_map_entry(attr_map, index + 1, id, req.subd_type, req.subd_desc);
}
}
}
void GeometryManager::update_svm_attributes(Device *,
DeviceScene *dscene,
Scene *scene,
vector<AttributeRequestSet> &geom_attributes,
vector<AttributeRequestSet> &object_attributes)
{
/* for SVM, the attributes_map table is used to lookup the offset of an
* attribute, based on a unique shader attribute id. */
/* compute array stride */
size_t attr_map_size = 0;
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
geom->attr_map_offset = attr_map_size;
#ifdef WITH_OSL
size_t attr_count = 0;
foreach (AttributeRequest &req, geom_attributes[i].requests) {
if (req.std != ATTR_STD_NONE &&
scene->shader_manager->get_attribute_id(req.std) != (uint64_t)req.std)
attr_count += 2;
else
attr_count += 1;
}
#else
const size_t attr_count = geom_attributes[i].size();
#endif
attr_map_size += (attr_count + 1) * ATTR_PRIM_TYPES;
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
/* only allocate a table for the object if it actually has attributes */
if (object_attributes[i].size() == 0) {
object->attr_map_offset = 0;
}
else {
object->attr_map_offset = attr_map_size;
attr_map_size += (object_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
}
}
if (attr_map_size == 0)
return;
if (!dscene->attributes_map.need_realloc()) {
return;
}
/* create attribute map */
AttributeMap *attr_map = dscene->attributes_map.alloc(attr_map_size);
memset(attr_map, 0, dscene->attributes_map.size() * sizeof(*attr_map));
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
AttributeRequestSet &attributes = geom_attributes[i];
/* set geometry attributes */
size_t index = geom->attr_map_offset;
foreach (AttributeRequest &req, attributes.requests) {
uint64_t id;
if (req.std == ATTR_STD_NONE)
id = scene->shader_manager->get_attribute_id(req.name);
else
id = scene->shader_manager->get_attribute_id(req.std);
emit_attribute_mapping(attr_map, index, id, req, geom);
index += ATTR_PRIM_TYPES;
#ifdef WITH_OSL
/* Some standard attributes are explicitly referenced via their standard ID, so add those
* again in case they were added under a different attribute ID. */
if (req.std != ATTR_STD_NONE && id != (uint64_t)req.std) {
emit_attribute_mapping(attr_map, index, (uint64_t)req.std, req, geom);
index += ATTR_PRIM_TYPES;
}
#endif
}
emit_attribute_map_terminator(attr_map, index, false, 0);
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
AttributeRequestSet &attributes = object_attributes[i];
/* set object attributes */
if (attributes.size() > 0) {
size_t index = object->attr_map_offset;
foreach (AttributeRequest &req, attributes.requests) {
uint64_t id;
if (req.std == ATTR_STD_NONE)
id = scene->shader_manager->get_attribute_id(req.name);
else
id = scene->shader_manager->get_attribute_id(req.std);
emit_attribute_mapping(attr_map, index, id, req, object->geometry);
index += ATTR_PRIM_TYPES;
}
emit_attribute_map_terminator(attr_map, index, true, object->geometry->attr_map_offset);
}
}
/* copy to device */
dscene->attributes_map.copy_to_device();
}
void GeometryManager::update_attribute_element_offset(Geometry *geom,
device_vector<float> &attr_float,
size_t &attr_float_offset,
device_vector<float2> &attr_float2,
size_t &attr_float2_offset,
device_vector<packed_float3> &attr_float3,
size_t &attr_float3_offset,
device_vector<float4> &attr_float4,
size_t &attr_float4_offset,
device_vector<uchar4> &attr_uchar4,
size_t &attr_uchar4_offset,
Attribute *mattr,
AttributePrimitive prim,
TypeDesc &type,
AttributeDescriptor &desc)
{
if (mattr) {
/* store element and type */
desc.element = mattr->element;
desc.flags = mattr->flags;
type = mattr->type;
/* store attribute data in arrays */
size_t size = mattr->element_size(geom, prim);
AttributeElement &element = desc.element;
int &offset = desc.offset;
if (mattr->element == ATTR_ELEMENT_VOXEL) {
/* store slot in offset value */
ImageHandle &handle = mattr->data_voxel();
offset = handle.svm_slot();
}
else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
uchar4 *data = mattr->data_uchar4();
offset = attr_uchar4_offset;
assert(attr_uchar4.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_uchar4[offset + k] = data[k];
}
attr_uchar4.tag_modified();
}
attr_uchar4_offset += size;
}
else if (mattr->type == TypeDesc::TypeFloat) {
float *data = mattr->data_float();
offset = attr_float_offset;
assert(attr_float.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float[offset + k] = data[k];
}
attr_float.tag_modified();
}
attr_float_offset += size;
}
else if (mattr->type == TypeFloat2) {
float2 *data = mattr->data_float2();
offset = attr_float2_offset;
assert(attr_float2.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float2[offset + k] = data[k];
}
attr_float2.tag_modified();
}
attr_float2_offset += size;
}
else if (mattr->type == TypeDesc::TypeMatrix) {
Transform *tfm = mattr->data_transform();
offset = attr_float4_offset;
assert(attr_float4.size() >= offset + size * 3);
if (mattr->modified) {
for (size_t k = 0; k < size * 3; k++) {
attr_float4[offset + k] = (&tfm->x)[k];
}
attr_float4.tag_modified();
}
attr_float4_offset += size * 3;
}
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
float4 *data = mattr->data_float4();
offset = attr_float4_offset;
assert(attr_float4.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float4[offset + k] = data[k];
}
attr_float4.tag_modified();
}
attr_float4_offset += size;
}
else {
float3 *data = mattr->data_float3();
offset = attr_float3_offset;
assert(attr_float3.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float3[offset + k] = data[k];
}
attr_float3.tag_modified();
}
attr_float3_offset += size;
}
/* mesh vertex/curve index is global, not per object, so we sneak
* a correction for that in here */
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
desc.flags & ATTR_SUBDIVIDED) {
/* Indices for subdivided attributes are retrieved
* from patch table so no need for correction here. */
}
else if (element == ATTR_ELEMENT_VERTEX)
offset -= mesh->vert_offset;
else if (element == ATTR_ELEMENT_VERTEX_MOTION)
offset -= mesh->vert_offset;
else if (element == ATTR_ELEMENT_FACE) {
if (prim == ATTR_PRIM_GEOMETRY)
offset -= mesh->prim_offset;
else
offset -= mesh->face_offset;
}
else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
if (prim == ATTR_PRIM_GEOMETRY)
offset -= 3 * mesh->prim_offset;
else
offset -= mesh->corner_offset;
}
}
else if (geom->is_hair()) {
Hair *hair = static_cast<Hair *>(geom);
if (element == ATTR_ELEMENT_CURVE)
offset -= hair->prim_offset;
else if (element == ATTR_ELEMENT_CURVE_KEY)
offset -= hair->curve_key_offset;
else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
offset -= hair->curve_key_offset;
}
else if (geom->is_pointcloud()) {
if (element == ATTR_ELEMENT_VERTEX)
offset -= geom->prim_offset;
else if (element == ATTR_ELEMENT_VERTEX_MOTION)
offset -= geom->prim_offset;
}
}
else {
/* attribute not found */
desc.element = ATTR_ELEMENT_NONE;
desc.offset = 0;
}
}
static void update_attribute_element_size(Geometry *geom,
Attribute *mattr,
AttributePrimitive prim,
size_t *attr_float_size,
size_t *attr_float2_size,
size_t *attr_float3_size,
size_t *attr_float4_size,
size_t *attr_uchar4_size)
{
if (mattr) {
size_t size = mattr->element_size(geom, prim);
if (mattr->element == ATTR_ELEMENT_VOXEL) {
/* pass */
}
else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
*attr_uchar4_size += size;
}
else if (mattr->type == TypeDesc::TypeFloat) {
*attr_float_size += size;
}
else if (mattr->type == TypeFloat2) {
*attr_float2_size += size;
}
else if (mattr->type == TypeDesc::TypeMatrix) {
*attr_float4_size += size * 4;
}
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
*attr_float4_size += size;
}
else {
*attr_float3_size += size;
}
}
}
void GeometryManager::device_update_attributes(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress)
{
progress.set_status("Updating Mesh", "Computing attributes");
/* gather per mesh requested attributes. as meshes may have multiple
* shaders assigned, this merges the requested attributes that have
* been set per shader by the shader manager */
vector<AttributeRequestSet> geom_attributes(scene->geometry.size());
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
geom->index = i;
scene->need_global_attributes(geom_attributes[i]);
foreach (Node *node, geom->get_used_shaders()) {
Shader *shader = static_cast<Shader *>(node);
geom_attributes[i].add(shader->attributes);
}
if (geom->is_hair() && static_cast<Hair *>(geom)->need_shadow_transparency()) {
geom_attributes[i].add(ATTR_STD_SHADOW_TRANSPARENCY);
}
}
/* convert object attributes to use the same data structures as geometry ones */
vector<AttributeRequestSet> object_attributes(scene->objects.size());
vector<AttributeSet> object_attribute_values;
object_attribute_values.reserve(scene->objects.size());
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
Geometry *geom = object->geometry;
size_t geom_idx = geom->index;
assert(geom_idx < scene->geometry.size() && scene->geometry[geom_idx] == geom);
object_attribute_values.push_back(AttributeSet(geom, ATTR_PRIM_GEOMETRY));
AttributeRequestSet &geom_requests = geom_attributes[geom_idx];
AttributeRequestSet &attributes = object_attributes[i];
AttributeSet &values = object_attribute_values[i];
for (size_t j = 0; j < object->attributes.size(); j++) {
ParamValue &param = object->attributes[j];
/* add attributes that are requested and not already handled by the mesh */
if (geom_requests.find(param.name()) && !geom->attributes.find(param.name())) {
attributes.add(param.name());
Attribute *attr = values.add(param.name(), param.type(), ATTR_ELEMENT_OBJECT);
assert(param.datasize() == attr->buffer.size());
memcpy(attr->buffer.data(), param.data(), param.datasize());
}
}
}
/* mesh attribute are stored in a single array per data type. here we fill
* those arrays, and set the offset and element type to create attribute
* maps next */
/* Pre-allocate attributes to avoid arrays re-allocation which would
* take 2x of overall attribute memory usage.
*/
size_t attr_float_size = 0;
size_t attr_float2_size = 0;
size_t attr_float3_size = 0;
size_t attr_float4_size = 0;
size_t attr_uchar4_size = 0;
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
AttributeRequestSet &attributes = geom_attributes[i];
foreach (AttributeRequest &req, attributes.requests) {
Attribute *attr = geom->attributes.find(req);
update_attribute_element_size(geom,
attr,
ATTR_PRIM_GEOMETRY,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
Attribute *subd_attr = mesh->subd_attributes.find(req);
update_attribute_element_size(mesh,
subd_attr,
ATTR_PRIM_SUBD,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
}
}
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
foreach (Attribute &attr, object_attribute_values[i].attributes) {
update_attribute_element_size(object->geometry,
&attr,
ATTR_PRIM_GEOMETRY,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
}
}
dscene->attributes_float.alloc(attr_float_size);
dscene->attributes_float2.alloc(attr_float2_size);
dscene->attributes_float3.alloc(attr_float3_size);
dscene->attributes_float4.alloc(attr_float4_size);
dscene->attributes_uchar4.alloc(attr_uchar4_size);
/* The order of those flags needs to match that of AttrKernelDataType. */
const bool attributes_need_realloc[AttrKernelDataType::NUM] = {
dscene->attributes_float.need_realloc(),
dscene->attributes_float2.need_realloc(),
dscene->attributes_float3.need_realloc(),
dscene->attributes_float4.need_realloc(),
dscene->attributes_uchar4.need_realloc(),
};
size_t attr_float_offset = 0;
size_t attr_float2_offset = 0;
size_t attr_float3_offset = 0;
size_t attr_float4_offset = 0;
size_t attr_uchar4_offset = 0;
/* Fill in attributes. */
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
AttributeRequestSet &attributes = geom_attributes[i];
/* todo: we now store std and name attributes from requests even if
* they actually refer to the same mesh attributes, optimize */
foreach (AttributeRequest &req, attributes.requests) {
Attribute *attr = geom->attributes.find(req);
if (attr) {
/* force a copy if we need to reallocate all the data */
attr->modified |= attributes_need_realloc[Attribute::kernel_type(*attr)];
}
update_attribute_element_offset(geom,
dscene->attributes_float,
attr_float_offset,
dscene->attributes_float2,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
attr,
ATTR_PRIM_GEOMETRY,
req.type,
req.desc);
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
Attribute *subd_attr = mesh->subd_attributes.find(req);
if (subd_attr) {
/* force a copy if we need to reallocate all the data */
subd_attr->modified |= attributes_need_realloc[Attribute::kernel_type(*subd_attr)];
}
update_attribute_element_offset(mesh,
dscene->attributes_float,
attr_float_offset,
dscene->attributes_float2,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
subd_attr,
ATTR_PRIM_SUBD,
req.subd_type,
req.subd_desc);
}
if (progress.get_cancel())
return;
}
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
AttributeRequestSet &attributes = object_attributes[i];
AttributeSet &values = object_attribute_values[i];
foreach (AttributeRequest &req, attributes.requests) {
Attribute *attr = values.find(req);
if (attr) {
attr->modified |= attributes_need_realloc[Attribute::kernel_type(*attr)];
}
update_attribute_element_offset(object->geometry,
dscene->attributes_float,
attr_float_offset,
dscene->attributes_float2,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
attr,
ATTR_PRIM_GEOMETRY,
req.type,
req.desc);
/* object attributes don't care about subdivision */
req.subd_type = req.type;
req.subd_desc = req.desc;
if (progress.get_cancel())
return;
}
}
/* create attribute lookup maps */
if (scene->shader_manager->use_osl())
update_osl_globals(device, scene);
update_svm_attributes(device, dscene, scene, geom_attributes, object_attributes);
if (progress.get_cancel())
return;
/* copy to device */
progress.set_status("Updating Mesh", "Copying Attributes to device");
dscene->attributes_float.copy_to_device_if_modified();
dscene->attributes_float2.copy_to_device_if_modified();
dscene->attributes_float3.copy_to_device_if_modified();
dscene->attributes_float4.copy_to_device_if_modified();
dscene->attributes_uchar4.copy_to_device_if_modified();
if (progress.get_cancel())
return;
/* After mesh attributes and patch tables have been copied to device memory,
* we need to update offsets in the objects. */
scene->object_manager->device_update_geom_offsets(device, dscene, scene);
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,196 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "bvh/bvh.h"
#include "bvh/bvh2.h"
#include "device/device.h"
#include "scene/attribute.h"
#include "scene/camera.h"
#include "scene/geometry.h"
#include "scene/hair.h"
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
#include "scene/shader_nodes.h"
#include "scene/stats.h"
#include "scene/volume.h"
#include "subd/patch_table.h"
#include "subd/split.h"
#include "kernel/osl/globals.h"
#include "util/foreach.h"
#include "util/log.h"
#include "util/progress.h"
#include "util/task.h"
CCL_NAMESPACE_BEGIN
void Geometry::compute_bvh(Device *device,
DeviceScene *dscene,
SceneParams *params,
Progress *progress,
size_t n,
size_t total)
{
if (progress->get_cancel())
return;
compute_bounds();
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
if (need_build_bvh(bvh_layout)) {
string msg = "Updating Geometry BVH ";
if (name.empty())
msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
else
msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
Object object;
/* Ensure all visibility bits are set at the geometry level BVH. In
* the object level BVH is where actual visibility is tested. */
object.set_is_shadow_catcher(true);
object.set_visibility(~0);
object.set_geometry(this);
vector<Geometry *> geometry;
geometry.push_back(this);
vector<Object *> objects;
objects.push_back(&object);
if (bvh && !need_update_rebuild) {
progress->set_status(msg, "Refitting BVH");
bvh->replace_geometry(geometry, objects);
device->build_bvh(bvh, *progress, true);
}
else {
progress->set_status(msg, "Building BVH");
BVHParams bparams;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_compact_structure = params->use_bvh_compact_structure;
bparams.bvh_layout = bvh_layout;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
params->use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
bparams.num_motion_curve_steps = params->num_bvh_time_steps;
bparams.num_motion_point_steps = params->num_bvh_time_steps;
bparams.bvh_type = params->bvh_type;
bparams.curve_subdivisions = params->curve_subdivisions();
delete bvh;
bvh = BVH::create(bparams, geometry, objects, device);
MEM_GUARDED_CALL(progress, device->build_bvh, bvh, *progress, false);
}
}
need_update_rebuild = false;
need_update_bvh_for_offset = false;
}
void GeometryManager::device_update_bvh(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress)
{
/* bvh build */
progress.set_status("Updating Scene BVH", "Building");
BVHParams bparams;
bparams.top_level = true;
bparams.bvh_layout = BVHParams::best_bvh_layout(
scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
scene->params.use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_point_steps = scene->params.num_bvh_time_steps;
bparams.bvh_type = scene->params.bvh_type;
bparams.curve_subdivisions = scene->params.curve_subdivisions();
VLOG_INFO << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
const bool can_refit = scene->bvh != nullptr &&
(bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX ||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL);
BVH *bvh = scene->bvh;
if (!scene->bvh) {
bvh = scene->bvh = BVH::create(bparams, scene->geometry, scene->objects, device);
}
device->build_bvh(bvh, progress, can_refit);
if (progress.get_cancel()) {
return;
}
const bool has_bvh2_layout = (bparams.bvh_layout == BVH_LAYOUT_BVH2);
PackedBVH pack;
if (has_bvh2_layout) {
pack = std::move(static_cast<BVH2 *>(bvh)->pack);
}
else {
pack.root_index = -1;
}
/* copy to device */
progress.set_status("Updating Scene BVH", "Copying BVH to device");
/* When using BVH2, we always have to copy/update the data as its layout is dependent on the
* BVH's leaf nodes which may be different when the objects or vertices move. */
if (pack.nodes.size()) {
dscene->bvh_nodes.steal_data(pack.nodes);
dscene->bvh_nodes.copy_to_device();
}
if (pack.leaf_nodes.size()) {
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
dscene->bvh_leaf_nodes.copy_to_device();
}
if (pack.object_node.size()) {
dscene->object_node.steal_data(pack.object_node);
dscene->object_node.copy_to_device();
}
if (pack.prim_type.size()) {
dscene->prim_type.steal_data(pack.prim_type);
dscene->prim_type.copy_to_device();
}
if (pack.prim_visibility.size()) {
dscene->prim_visibility.steal_data(pack.prim_visibility);
dscene->prim_visibility.copy_to_device();
}
if (pack.prim_index.size()) {
dscene->prim_index.steal_data(pack.prim_index);
dscene->prim_index.copy_to_device();
}
if (pack.prim_object.size()) {
dscene->prim_object.steal_data(pack.prim_object);
dscene->prim_object.copy_to_device();
}
if (pack.prim_time.size()) {
dscene->prim_time.steal_data(pack.prim_time);
dscene->prim_time.copy_to_device();
}
dscene->data.bvh.root = pack.root_index;
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
/* The scene handle is set in 'CPUDevice::const_copy_to' and 'OptiXDevice::const_copy_to' */
dscene->data.device_bvh = 0;
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,223 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "bvh/bvh.h"
#include "bvh/bvh2.h"
#include "device/device.h"
#include "scene/attribute.h"
#include "scene/camera.h"
#include "scene/geometry.h"
#include "scene/hair.h"
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/osl.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
#include "scene/shader_nodes.h"
#include "scene/stats.h"
#include "scene/volume.h"
#include "subd/patch_table.h"
#include "subd/split.h"
#ifdef WITH_OSL
# include "kernel/osl/globals.h"
#endif
#include "util/foreach.h"
#include "util/log.h"
#include "util/progress.h"
#include "util/task.h"
CCL_NAMESPACE_BEGIN
void GeometryManager::device_update_mesh(Device *,
DeviceScene *dscene,
Scene *scene,
Progress &progress)
{
/* Count. */
size_t vert_size = 0;
size_t tri_size = 0;
size_t curve_key_size = 0;
size_t curve_size = 0;
size_t curve_segment_size = 0;
size_t point_size = 0;
size_t patch_size = 0;
foreach (Geometry *geom, scene->geometry) {
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
Mesh *mesh = static_cast<Mesh *>(geom);
vert_size += mesh->verts.size();
tri_size += mesh->num_triangles();
if (mesh->get_num_subd_faces()) {
Mesh::SubdFace last = mesh->get_subd_face(mesh->get_num_subd_faces() - 1);
patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
/* patch tables are stored in same array so include them in patch_size */
if (mesh->patch_table) {
mesh->patch_table_offset = patch_size;
patch_size += mesh->patch_table->total_size();
}
}
}
else if (geom->is_hair()) {
Hair *hair = static_cast<Hair *>(geom);
curve_key_size += hair->get_curve_keys().size();
curve_size += hair->num_curves();
curve_segment_size += hair->num_segments();
}
else if (geom->is_pointcloud()) {
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
point_size += pointcloud->num_points();
}
}
/* Fill in all the arrays. */
if (tri_size != 0) {
/* normals */
progress.set_status("Updating Mesh", "Computing normals");
packed_float3 *tri_verts = dscene->tri_verts.alloc(vert_size);
uint *tri_shader = dscene->tri_shader.alloc(tri_size);
packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size);
packed_uint3 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
uint *tri_patch = dscene->tri_patch.alloc(tri_size);
float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
const bool copy_all_data = dscene->tri_shader.need_realloc() ||
dscene->tri_vindex.need_realloc() ||
dscene->tri_vnormal.need_realloc() ||
dscene->tri_patch.need_realloc() ||
dscene->tri_patch_uv.need_realloc();
foreach (Geometry *geom, scene->geometry) {
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->shader_is_modified() || mesh->smooth_is_modified() ||
mesh->triangles_is_modified() || copy_all_data) {
mesh->pack_shaders(scene, &tri_shader[mesh->prim_offset]);
}
if (mesh->verts_is_modified() || copy_all_data) {
mesh->pack_normals(&vnormal[mesh->vert_offset]);
}
if (mesh->verts_is_modified() || mesh->triangles_is_modified() ||
mesh->vert_patch_uv_is_modified() || copy_all_data) {
mesh->pack_verts(&tri_verts[mesh->vert_offset],
&tri_vindex[mesh->prim_offset],
&tri_patch[mesh->prim_offset],
&tri_patch_uv[mesh->vert_offset]);
}
if (progress.get_cancel())
return;
}
}
/* vertex coordinates */
progress.set_status("Updating Mesh", "Copying Mesh to device");
dscene->tri_verts.copy_to_device_if_modified();
dscene->tri_shader.copy_to_device_if_modified();
dscene->tri_vnormal.copy_to_device_if_modified();
dscene->tri_vindex.copy_to_device_if_modified();
dscene->tri_patch.copy_to_device_if_modified();
dscene->tri_patch_uv.copy_to_device_if_modified();
}
if (curve_segment_size != 0) {
progress.set_status("Updating Mesh", "Copying Curves to device");
float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
KernelCurve *curves = dscene->curves.alloc(curve_size);
KernelCurveSegment *curve_segments = dscene->curve_segments.alloc(curve_segment_size);
const bool copy_all_data = dscene->curve_keys.need_realloc() ||
dscene->curves.need_realloc() ||
dscene->curve_segments.need_realloc();
foreach (Geometry *geom, scene->geometry) {
if (geom->is_hair()) {
Hair *hair = static_cast<Hair *>(geom);
bool curve_keys_co_modified = hair->curve_radius_is_modified() ||
hair->curve_keys_is_modified();
bool curve_data_modified = hair->curve_shader_is_modified() ||
hair->curve_first_key_is_modified();
if (!curve_keys_co_modified && !curve_data_modified && !copy_all_data) {
continue;
}
hair->pack_curves(scene,
&curve_keys[hair->curve_key_offset],
&curves[hair->prim_offset],
&curve_segments[hair->curve_segment_offset]);
if (progress.get_cancel())
return;
}
}
dscene->curve_keys.copy_to_device_if_modified();
dscene->curves.copy_to_device_if_modified();
dscene->curve_segments.copy_to_device_if_modified();
}
if (point_size != 0) {
progress.set_status("Updating Mesh", "Copying Point clouds to device");
float4 *points = dscene->points.alloc(point_size);
uint *points_shader = dscene->points_shader.alloc(point_size);
foreach (Geometry *geom, scene->geometry) {
if (geom->is_pointcloud()) {
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
pointcloud->pack(
scene, &points[pointcloud->prim_offset], &points_shader[pointcloud->prim_offset]);
if (progress.get_cancel())
return;
}
}
dscene->points.copy_to_device();
dscene->points_shader.copy_to_device();
}
if (patch_size != 0 && dscene->patches.need_realloc()) {
progress.set_status("Updating Mesh", "Copying Patches to device");
uint *patch_data = dscene->patches.alloc(patch_size);
foreach (Geometry *geom, scene->geometry) {
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
mesh->pack_patches(&patch_data[mesh->patch_offset]);
if (mesh->patch_table) {
mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
mesh->patch_table_offset);
}
if (progress.get_cancel())
return;
}
}
dscene->patches.copy_to_device();
}
}
CCL_NAMESPACE_END

View File

@ -450,6 +450,9 @@ void LightManager::device_update_tree(Device *,
* More benchmarking is needed to determine what number works best. */
LightTree light_tree(scene, dscene, progress, 8);
LightTreeNode *root = light_tree.build(scene, dscene);
if (progress.get_cancel()) {
return;
}
/* We want to create separate arrays corresponding to triangles and lights,
* which will be used to index back into the light tree for PDF calculations. */

View File

@ -595,7 +595,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene)
{
if (!scene->integrator->get_use_light_tree()) {
BVHLayoutMask layout_mask = device->get_bvh_layout_mask();
BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
return;

View File

@ -24,6 +24,7 @@
#include "scene/svm.h"
#include "scene/tables.h"
#include "scene/volume.h"
#include "scene/devicescene.h"
#include "session/session.h"
#include "util/foreach.h"
@ -33,59 +34,7 @@
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
: bvh_nodes(device, "bvh_nodes", MEM_GLOBAL),
bvh_leaf_nodes(device, "bvh_leaf_nodes", MEM_GLOBAL),
object_node(device, "object_node", MEM_GLOBAL),
prim_type(device, "prim_type", MEM_GLOBAL),
prim_visibility(device, "prim_visibility", MEM_GLOBAL),
prim_index(device, "prim_index", MEM_GLOBAL),
prim_object(device, "prim_object", MEM_GLOBAL),
prim_time(device, "prim_time", MEM_GLOBAL),
tri_verts(device, "tri_verts", MEM_GLOBAL),
tri_shader(device, "tri_shader", MEM_GLOBAL),
tri_vnormal(device, "tri_vnormal", MEM_GLOBAL),
tri_vindex(device, "tri_vindex", MEM_GLOBAL),
tri_patch(device, "tri_patch", MEM_GLOBAL),
tri_patch_uv(device, "tri_patch_uv", MEM_GLOBAL),
curves(device, "curves", MEM_GLOBAL),
curve_keys(device, "curve_keys", MEM_GLOBAL),
curve_segments(device, "curve_segments", MEM_GLOBAL),
patches(device, "patches", MEM_GLOBAL),
points(device, "points", MEM_GLOBAL),
points_shader(device, "points_shader", MEM_GLOBAL),
objects(device, "objects", MEM_GLOBAL),
object_motion_pass(device, "object_motion_pass", MEM_GLOBAL),
object_motion(device, "object_motion", MEM_GLOBAL),
object_flag(device, "object_flag", MEM_GLOBAL),
object_volume_step(device, "object_volume_step", MEM_GLOBAL),
object_prim_offset(device, "object_prim_offset", MEM_GLOBAL),
camera_motion(device, "camera_motion", MEM_GLOBAL),
attributes_map(device, "attributes_map", MEM_GLOBAL),
attributes_float(device, "attributes_float", MEM_GLOBAL),
attributes_float2(device, "attributes_float2", MEM_GLOBAL),
attributes_float3(device, "attributes_float3", MEM_GLOBAL),
attributes_float4(device, "attributes_float4", MEM_GLOBAL),
attributes_uchar4(device, "attributes_uchar4", MEM_GLOBAL),
light_distribution(device, "light_distribution", MEM_GLOBAL),
lights(device, "lights", MEM_GLOBAL),
light_background_marginal_cdf(device, "light_background_marginal_cdf", MEM_GLOBAL),
light_background_conditional_cdf(device, "light_background_conditional_cdf", MEM_GLOBAL),
light_tree_nodes(device, "light_tree_nodes", MEM_GLOBAL),
light_tree_emitters(device, "light_tree_emitters", MEM_GLOBAL),
light_to_tree(device, "light_to_tree", MEM_GLOBAL),
object_to_tree(device, "object_to_tree", MEM_GLOBAL),
object_lookup_offset(device, "object_lookup_offset", MEM_GLOBAL),
triangle_to_tree(device, "triangle_to_tree", MEM_GLOBAL),
particles(device, "particles", MEM_GLOBAL),
svm_nodes(device, "svm_nodes", MEM_GLOBAL),
shaders(device, "shaders", MEM_GLOBAL),
lookup_table(device, "lookup_table", MEM_GLOBAL),
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
ies_lights(device, "ies", MEM_GLOBAL)
{
memset((void *)&data, 0, sizeof(data));
}
Scene::Scene(const SceneParams &params_, Device *device)
: name("Scene"),
@ -601,7 +550,7 @@ static void log_kernel_features(const uint features)
<< "\n";
VLOG_INFO << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE)
<< "\n";
VLOG_INFO << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
VLOG_INFO << "Use MNEE " << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
VLOG_INFO << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT)
<< "\n";
VLOG_INFO << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n";

View File

@ -6,20 +6,16 @@
#include "bvh/params.h"
#include "scene/devicescene.h"
#include "scene/film.h"
#include "scene/image.h"
#include "scene/shader.h"
#include "device/device.h"
#include "device/memory.h"
#include "util/param.h"
#include "util/string.h"
#include "util/system.h"
#include "util/texture.h"
#include "util/thread.h"
#include "util/types.h"
#include "util/vector.h"
CCL_NAMESPACE_BEGIN
@ -54,92 +50,6 @@ class RenderStats;
class SceneUpdateStats;
class Volume;
/* Scene Device Data */
class DeviceScene {
public:
/* BVH */
device_vector<int4> bvh_nodes;
device_vector<int4> bvh_leaf_nodes;
device_vector<int> object_node;
device_vector<int> prim_type;
device_vector<uint> prim_visibility;
device_vector<int> prim_index;
device_vector<int> prim_object;
device_vector<float2> prim_time;
/* mesh */
device_vector<packed_float3> tri_verts;
device_vector<uint> tri_shader;
device_vector<packed_float3> tri_vnormal;
device_vector<packed_uint3> tri_vindex;
device_vector<uint> tri_patch;
device_vector<float2> tri_patch_uv;
device_vector<KernelCurve> curves;
device_vector<float4> curve_keys;
device_vector<KernelCurveSegment> curve_segments;
device_vector<uint> patches;
/* point-cloud */
device_vector<float4> points;
device_vector<uint> points_shader;
/* objects */
device_vector<KernelObject> objects;
device_vector<Transform> object_motion_pass;
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
device_vector<float> object_volume_step;
device_vector<uint> object_prim_offset;
/* cameras */
device_vector<DecomposedTransform> camera_motion;
/* attributes */
device_vector<AttributeMap> attributes_map;
device_vector<float> attributes_float;
device_vector<float2> attributes_float2;
device_vector<packed_float3> attributes_float3;
device_vector<float4> attributes_float4;
device_vector<uchar4> attributes_uchar4;
/* lights */
device_vector<KernelLightDistribution> light_distribution;
device_vector<KernelLight> lights;
device_vector<float2> light_background_marginal_cdf;
device_vector<float2> light_background_conditional_cdf;
/* light tree */
device_vector<KernelLightTreeNode> light_tree_nodes;
device_vector<KernelLightTreeEmitter> light_tree_emitters;
device_vector<uint> light_to_tree;
device_vector<uint> object_to_tree;
device_vector<uint> object_lookup_offset;
device_vector<uint> triangle_to_tree;
/* particles */
device_vector<KernelParticle> particles;
/* shaders */
device_vector<int4> svm_nodes;
device_vector<KernelShader> shaders;
/* lookup tables */
device_vector<float> lookup_table;
/* integrator */
device_vector<float> sample_pattern_lut;
/* IES lights */
device_vector<float> ies_lights;
KernelData data;
DeviceScene(Device *device);
};
/* Scene Parameters */
class SceneParams {

View File

@ -621,12 +621,12 @@ void Session::set_pause(bool pause)
void Session::set_output_driver(unique_ptr<OutputDriver> driver)
{
path_trace_->set_output_driver(move(driver));
path_trace_->set_output_driver(std::move(driver));
}
void Session::set_display_driver(unique_ptr<DisplayDriver> driver)
{
path_trace_->set_display_driver(move(driver));
path_trace_->set_display_driver(std::move(driver));
}
double Session::get_estimated_remaining_time() const

View File

@ -285,7 +285,7 @@ static bool configure_image_spec_from_buffer(ImageSpec *image_spec,
*image_spec = ImageSpec(
buffer_params.width, buffer_params.height, num_channels, TypeDesc::FLOAT);
image_spec->channelnames = move(channel_names);
image_spec->channelnames = std::move(channel_names);
if (!buffer_params_to_image_spec_atttributes(image_spec, buffer_params)) {
return false;

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
if(WITH_GTESTS)
if(WITH_GTESTS AND WITH_CYCLES_LOGGING)
Include(GTestTesting)
# Otherwise we get warnings here that we can't fix in external projects
@ -53,6 +53,6 @@ if(NOT APPLE)
endif()
endif()
if(WITH_GTESTS)
if(WITH_GTESTS AND WITH_CYCLES_LOGGING)
blender_src_gtest(cycles "${SRC}" "${LIB}")
endif()

View File

@ -36,8 +36,7 @@ class LogMessageVoidify {
# define CHECK(expression) LOG_SUPPRESS()
# define CHECK_NOTNULL(expression) LOG_SUPPRESS()
# define CHECK_NULL(expression) LOG_SUPPRESS()
# define CHECK_NOTNULL(expression) (expression)
# define CHECK_NEAR(actual, expected, eps) LOG_SUPPRESS()
@ -50,8 +49,7 @@ class LogMessageVoidify {
# define DCHECK(expression) LOG_SUPPRESS()
# define DCHECK_NOTNULL(expression) LOG_SUPPRESS()
# define DCHECK_NULL(expression) LOG_SUPPRESS()
# define DCHECK_NOTNULL(expression) (expression)
# define DCHECK_NEAR(actual, expected, eps) LOG_SUPPRESS()

View File

@ -809,7 +809,7 @@ static string path_source_replace_includes_recursive(const string &_source,
const size_t source_length = source.length();
size_t index = 0;
/* Information about where we are in the source. */
size_t line_number = 0, column_number = 1;
size_t column_number = 1;
/* Currently gathered non-preprocessor token.
* Store as start/length rather than token itself to avoid overhead of
* memory re-allocations on each character concatenation.
@ -842,7 +842,6 @@ static string path_source_replace_includes_recursive(const string &_source,
preprocessor_line = "";
}
column_number = 0;
++line_number;
}
else if (ch == '#' && column_number == 1 && !inside_preprocessor) {
/* Append all possible non-preprocessor token to the result. */

View File

@ -4,7 +4,6 @@
#ifndef __UTIL_VECTOR_H__
#define __UTIL_VECTOR_H__
#include <cassert>
#include <cstring>
#include <vector>

View File

@ -897,12 +897,10 @@ void Octree::printPath(PathElement *path)
void Octree::printPaths(PathList *path)
{
PathList *iter = path;
int i = 0;
while (iter != NULL) {
dc_printf("Path %d:\n", i);
printPath(iter);
iter = iter->next;
i++;
}
}
@ -1256,7 +1254,6 @@ Node *Octree::connectFace(
updateParent(&newnode->internal, len, st);
int flag = 0;
// Add the cells to the rings and fill in the patch
PathElement *newEleN;
if (curEleN->pos[0] != stN[0] || curEleN->pos[1] != stN[1] || curEleN->pos[2] != stN[2]) {
@ -1286,7 +1283,6 @@ Node *Octree::connectFace(
alpha);
curEleN = newEleN;
flag++;
}
PathElement *newEleP;
@ -1316,7 +1312,6 @@ Node *Octree::connectFace(
alpha);
curEleP = newEleP;
flag++;
}
/*
@ -1543,6 +1538,8 @@ void Octree::getFacePoint(PathElement *leaf, int dir, int &x, int &y, float &p,
float off[3];
int num = 0, num2 = 0;
(void)num2; // Unused in release builds.
LeafNode *leafnode = locateLeaf(leaf->pos);
for (int i = 0; i < 4; i++) {
int edgeind = faceMap[dir * 2][i];

View File

@ -72,7 +72,7 @@ class GHOST_ISystemPaths {
/**
* Add the file to the operating system most recently used files
*/
virtual void addToSystemRecentFiles(const char *filename) const = 0;
virtual void addToSystemRecentFiles(const char *filepath) const = 0;
private:
/** The one and only system paths. */

View File

@ -61,7 +61,7 @@ extern const char *GHOST_getBinaryDir(void);
/**
* Add the file to the operating system most recently used files
*/
extern void GHOST_addToSystemRecentFiles(const char *filename);
extern void GHOST_addToSystemRecentFiles(const char *filepath);
#ifdef __cplusplus
}

View File

@ -10,6 +10,9 @@
#include "GHOST_Context.hh"
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# include <epoxy/wgl.h>
# include <tchar.h>
#
@ -18,6 +21,8 @@
# endif
#endif
#include <epoxy/gl.h>
#include <cstdio>
#include <cstring>

View File

@ -11,8 +11,6 @@
#include "GHOST_IContext.hh"
#include "GHOST_Types.h"
#include <epoxy/gl.h>
#include <cstdlib> // for NULL
class GHOST_Context : public GHOST_IContext {

View File

@ -19,6 +19,8 @@
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include <epoxy/gl.h>
#include <cassert>
#include <vector>

View File

@ -903,13 +903,14 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
auto extensions_available = getExtensionsAvailable();
vector<const char *> layers_enabled;
if (m_debug) {
enableLayer(layers_available, layers_enabled, VkLayer::KHRONOS_validation, m_debug);
}
vector<const char *> extensions_device;
vector<const char *> extensions_enabled;
if (m_debug) {
enableLayer(layers_available, layers_enabled, VkLayer::KHRONOS_validation, m_debug);
requireExtension(extensions_available, extensions_enabled, VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
if (use_window_surface) {
const char *native_surface_extension_name = getPlatformSpecificSurfaceExtension();

View File

@ -57,4 +57,4 @@ class GHOST_IXrGraphicsBinding {
};
std::unique_ptr<GHOST_IXrGraphicsBinding> GHOST_XrGraphicsBindingCreateFromType(
GHOST_TXrGraphicsBinding type, GHOST_Context &ghost_ctx);
GHOST_TXrGraphicsBinding type, GHOST_Context &context);

View File

@ -761,7 +761,7 @@ GHOST_IContext *GHOST_SystemCocoa::createOffscreenContext(GHOST_GLSettings glSet
#ifdef WITH_VULKAN_BACKEND
if (glSettings.context_type == GHOST_kDrawingContextTypeVulkan) {
const bool debug_context = (glSettings.flags & GHOST_glDebugContext) != 0;
GHOST_Context *context = new GHOST_ContextVK(false, NULL, 1, 0, debug_context);
GHOST_Context *context = new GHOST_ContextVK(false, NULL, 1, 2, debug_context);
if (!context->initializeDrawingContext()) {
delete context;
return NULL;
@ -900,12 +900,13 @@ GHOST_TSuccess GHOST_SystemCocoa::getButtons(GHOST_Buttons &buttons) const
GHOST_TCapabilityFlag GHOST_SystemCocoa::getCapabilities() const
{
return GHOST_TCapabilityFlag(GHOST_CAPABILITY_FLAG_ALL &
~(
/* Cocoa has no support for a primary selection clipboard. */
GHOST_kCapabilityPrimaryClipboard |
/* This Cocoa back-end has not yet implemented image copy/paste. */
GHOST_kCapabilityClipboardImages));
return GHOST_TCapabilityFlag(
GHOST_CAPABILITY_FLAG_ALL &
~(
/* Cocoa has no support for a primary selection clipboard. */
GHOST_kCapabilityPrimaryClipboard |
/* This Cocoa back-end has not yet implemented image copy/paste. */
GHOST_kCapabilityClipboardImages));
}
#pragma mark Event handlers

View File

@ -47,5 +47,5 @@ class GHOST_SystemPaths : public GHOST_ISystemPaths {
/**
* Add the file to the operating system most recently used files
*/
virtual void addToSystemRecentFiles(const char *filename) const = 0;
virtual void addToSystemRecentFiles(const char *filepath) const = 0;
};

View File

@ -54,5 +54,5 @@ class GHOST_SystemPathsCocoa : public GHOST_SystemPaths {
/**
* Add the file to the operating system most recently used files
*/
void addToSystemRecentFiles(const char *filename) const;
void addToSystemRecentFiles(const char *filepath) const;
};

Some files were not shown because too many files have changed in this diff Show More