Attributes: Integrate implicit sharing with the attribute API #107059
|
@ -90,28 +90,26 @@ include(cmake/haru.cmake)
|
|||
# Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
|
||||
include(cmake/boost.cmake)
|
||||
include(cmake/pugixml.cmake)
|
||||
include(cmake/ispc.cmake)
|
||||
include(cmake/openimagedenoise.cmake)
|
||||
include(cmake/embree.cmake)
|
||||
include(cmake/openpgl.cmake)
|
||||
include(cmake/fmt.cmake)
|
||||
include(cmake/robinmap.cmake)
|
||||
include(cmake/xml2.cmake)
|
||||
|
||||
include(cmake/fribidi.cmake)
|
||||
include(cmake/harfbuzz.cmake)
|
||||
if(NOT APPLE)
|
||||
include(cmake/xr_openxr.cmake)
|
||||
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
|
||||
include(cmake/dpcpp.cmake)
|
||||
include(cmake/dpcpp_deps.cmake)
|
||||
endif()
|
||||
include(cmake/dpcpp.cmake)
|
||||
include(cmake/dpcpp_deps.cmake)
|
||||
if(NOT WIN32)
|
||||
include(cmake/igc.cmake)
|
||||
include(cmake/gmmlib.cmake)
|
||||
include(cmake/ocloc.cmake)
|
||||
endif()
|
||||
endif()
|
||||
include(cmake/ispc.cmake)
|
||||
include(cmake/openimagedenoise.cmake)
|
||||
# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
|
||||
include(cmake/embree.cmake)
|
||||
include(cmake/openpgl.cmake)
|
||||
include(cmake/fmt.cmake)
|
||||
include(cmake/robinmap.cmake)
|
||||
include(cmake/xml2.cmake)
|
||||
|
||||
# OpenColorIO and dependencies.
|
||||
include(cmake/expat.cmake)
|
||||
|
|
|
@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
|
|||
download_source(ICDLOADER)
|
||||
download_source(MP11)
|
||||
download_source(SPIRV_HEADERS)
|
||||
download_source(UNIFIED_RUNTIME)
|
||||
download_source(IGC)
|
||||
download_source(IGC_LLVM)
|
||||
download_source(IGC_OPENCL_CLANG)
|
||||
|
|
|
@ -5,6 +5,9 @@
|
|||
# for now.
|
||||
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
|
||||
|
||||
# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
|
||||
string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
|
||||
|
||||
if(WIN32)
|
||||
set(LLVM_GENERATOR "Ninja")
|
||||
else()
|
||||
|
@ -38,17 +41,18 @@ set(DPCPP_EXTRA_ARGS
|
|||
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
|
||||
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
|
||||
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
|
||||
-DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
|
||||
# Below here is copied from an invocation of buildbot/config.py
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON
|
||||
-DLLVM_TARGETS_TO_BUILD=X86
|
||||
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
|
||||
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
|
||||
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
|
||||
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
|
||||
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
|
||||
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
|
||||
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
|
||||
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
|
||||
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
|
||||
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
|
||||
-DLIBCLC_TARGETS_TO_BUILD=
|
||||
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
|
||||
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
|
||||
|
@ -104,13 +108,13 @@ add_dependencies(
|
|||
external_mp11
|
||||
external_level-zero
|
||||
external_spirvheaders
|
||||
external_unifiedruntime
|
||||
)
|
||||
|
||||
if(BUILD_MODE STREQUAL Release AND WIN32)
|
||||
ExternalProject_Add_Step(external_dpcpp after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
|
|||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_unifiedruntime
|
||||
URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
|
||||
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/unifiedruntime
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
# Note the utility apps may use png/tiff/gif system libraries, but the
|
||||
# library itself does not depend on them, so should give no problems.
|
||||
|
||||
set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
|
||||
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
-DEMBREE_ISPC_SUPPORT=OFF
|
||||
-DEMBREE_TUTORIALS=OFF
|
||||
|
@ -31,6 +33,43 @@ if(NOT BLENDER_PLATFORM_ARM)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(NOT APPLE)
|
||||
if(WIN32)
|
||||
# Levels below -O2 don't work well for Embree+SYCL.
|
||||
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
|
||||
string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
|
||||
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
|
||||
string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
|
||||
set(EMBREE_CMAKE_FLAGS
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_MODE}
|
||||
-DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
|
||||
-DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
|
||||
-DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
|
||||
-DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
|
||||
-DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
|
||||
-DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
|
||||
-DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
|
||||
-DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
|
||||
-DCMAKE_CXX_STANDARD=17
|
||||
)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
|
||||
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
|
||||
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
|
||||
-DEMBREE_SYCL_SUPPORT=ON
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
)
|
||||
else()
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
|
||||
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
|
||||
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
|
||||
-DEMBREE_SYCL_SUPPORT=ON
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(TBB_STATIC_LIBRARY)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
|
@ -42,16 +81,25 @@ ExternalProject_Add(external_embree
|
|||
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
|
||||
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
|
||||
PREFIX ${BUILD_DIR}/embree
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/embree
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
external_embree
|
||||
external_tbb
|
||||
)
|
||||
if(NOT APPLE)
|
||||
add_dependencies(
|
||||
external_embree
|
||||
external_tbb
|
||||
external_dpcpp
|
||||
)
|
||||
else()
|
||||
add_dependencies(
|
||||
external_embree
|
||||
external_tbb
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
if(BUILD_MODE STREQUAL Release)
|
||||
|
@ -66,6 +114,7 @@ if(WIN32)
|
|||
ExternalProject_Add_Step(external_embree after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -74,6 +74,27 @@ if(WIN32)
|
|||
set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
|
||||
# Set similar flags for CLANG compilation.
|
||||
set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
|
||||
|
||||
if(WITH_OPTIMIZED_DEBUG)
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
else()
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
endif()
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
|
||||
if(WITH_OPTIMIZED_DEBUG)
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
|
||||
else()
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
|
||||
endif()
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
|
||||
|
||||
set(PLATFORM_FLAGS)
|
||||
set(PLATFORM_CXX_FLAGS)
|
||||
set(PLATFORM_CMAKE_FLAGS)
|
||||
|
|
|
@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
|
|||
set(OPENPGL_HASH_TYPE SHA256)
|
||||
set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)
|
||||
|
||||
set(LEVEL_ZERO_VERSION v1.8.5)
|
||||
set(LEVEL_ZERO_VERSION v1.8.8)
|
||||
set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
|
||||
set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
|
||||
set(LEVEL_ZERO_HASH_TYPE SHA256)
|
||||
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
|
||||
set(DPCPP_VERSION 20221019)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
|
||||
set(DPCPP_VERSION 2022-12)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
|
||||
set(DPCPP_HASH_TYPE SHA256)
|
||||
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
||||
|
||||
|
@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
|||
# will take care of building them, unpack is being done in dpcpp_deps.cmake
|
||||
|
||||
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
|
||||
set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
|
||||
set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
|
||||
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
|
||||
set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
|
||||
set(VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
|
@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
|
|||
set(SPIRV_HEADERS_HASH_TYPE SHA256)
|
||||
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
|
||||
# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
|
||||
set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
|
||||
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
|
||||
set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
|
||||
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
|
||||
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
|
||||
|
||||
######################
|
||||
### DPCPP DEPS END ###
|
||||
######################
|
||||
|
@ -730,9 +737,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
|
|||
set(GMMLIB_HASH_TYPE SHA256)
|
||||
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
||||
|
||||
set(OCLOC_VERSION 22.49.25018.21)
|
||||
set(OCLOC_VERSION 23.05.25593.18)
|
||||
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
||||
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
|
||||
set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
|
||||
set(OCLOC_HASH_TYPE SHA256)
|
||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
|
|||
external_dpcpp -- external_mp11;
|
||||
external_dpcpp -- external_level_zero;
|
||||
external_dpcpp -- external_spirvheaders;
|
||||
external_dpcpp -- external_unifiedruntime;
|
||||
external_embree -- external_tbb;
|
||||
external_ffmpeg -- external_zlib;
|
||||
external_ffmpeg -- external_openjpeg;
|
||||
|
|
|
@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
|
|||
libsycldevice-obj
|
||||
libsycldevice-spv)
|
||||
|
||||
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
|
||||
index 17eeaafae194..09e6d2217aaa 100644
|
||||
--- a/sycl/source/detail/program_manager/program_manager.cpp
|
||||
+++ b/sycl/source/detail/program_manager/program_manager.cpp
|
||||
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
|
||||
}
|
||||
assert(BinImages.size() > 0 && "Expected to find at least one device image");
|
||||
|
||||
+ // Ignore images with incompatible state. Image is considered compatible
|
||||
+ // with a target state if an image is already in the target state or can
|
||||
+ // be brought to target state by compiling/linking/building.
|
||||
+ //
|
||||
+ // Example: an image in "executable" state is not compatible with
|
||||
+ // "input" target state - there is no operation to convert the image it
|
||||
+ // to "input" state. An image in "input" state is compatible with
|
||||
+ // "executable" target state because it can be built to get into
|
||||
+ // "executable" state.
|
||||
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
|
||||
+ if (getBinImageState(*It) > TargetState)
|
||||
+ It = BinImages.erase(It);
|
||||
+ else
|
||||
+ ++It;
|
||||
+ }
|
||||
+
|
||||
std::vector<device_image_plain> SYCLDeviceImages;
|
||||
- for (RTDeviceBinaryImage *BinImage : BinImages) {
|
||||
- const bundle_state ImgState = getBinImageState(BinImage);
|
||||
-
|
||||
- // Ignore images with incompatible state. Image is considered compatible
|
||||
- // with a target state if an image is already in the target state or can
|
||||
- // be brought to target state by compiling/linking/building.
|
||||
- //
|
||||
- // Example: an image in "executable" state is not compatible with
|
||||
- // "input" target state - there is no operation to convert the image it
|
||||
- // to "input" state. An image in "input" state is compatible with
|
||||
- // "executable" target state because it can be built to get into
|
||||
- // "executable" state.
|
||||
- if (ImgState > TargetState)
|
||||
- continue;
|
||||
|
||||
- for (const sycl::device &Dev : Devs) {
|
||||
+ // If a non-input state is requested, we can filter out some compatible
|
||||
+ // images and return only those with the highest compatible state for each
|
||||
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
|
||||
+ // image, so that any unneeded ones are skipped.
|
||||
+ // TODO this has no effect if the requested state is input, consider having
|
||||
+ // a separate branch for that case to avoid unnecessary tracking work.
|
||||
+ struct DeviceBinaryImageInfo {
|
||||
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
|
||||
+ bundle_state State = bundle_state::input;
|
||||
+ int RequirementCounter = 0;
|
||||
+ };
|
||||
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
|
||||
+
|
||||
+ for (const sycl::device &Dev : Devs) {
|
||||
+ // Track the highest image state for each requested kernel.
|
||||
+ using StateImagesPairT =
|
||||
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
|
||||
+ using KernelImageMapT =
|
||||
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
|
||||
+ KernelImageMapT KernelImageMap;
|
||||
+ if (!KernelIDs.empty())
|
||||
+ for (const kernel_id &KernelID : KernelIDs)
|
||||
+ KernelImageMap.insert({KernelID, {}});
|
||||
+
|
||||
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
|
||||
if (!compatibleWithDevice(BinImage, Dev) ||
|
||||
!doesDevSupportImgAspects(Dev, *BinImage))
|
||||
continue;
|
||||
|
||||
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
|
||||
- // Collect kernel names for the image
|
||||
- {
|
||||
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
|
||||
- KernelIDs = m_BinImg2KernelIDs[BinImage];
|
||||
- // If the image does not contain any non-service kernels we can skip it.
|
||||
- if (!KernelIDs || KernelIDs->empty())
|
||||
- continue;
|
||||
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
|
||||
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
|
||||
+ if (InsertRes.second) {
|
||||
+ ImgInfo.State = getBinImageState(BinImage);
|
||||
+ // Collect kernel names for the image
|
||||
+ {
|
||||
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
|
||||
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
|
||||
+ }
|
||||
}
|
||||
+ const bundle_state ImgState = ImgInfo.State;
|
||||
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
|
||||
+ ImgInfo.KernelIDs;
|
||||
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
|
||||
|
||||
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
|
||||
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
|
||||
+ // If the image does not contain any non-service kernels we can skip it.
|
||||
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
|
||||
+ continue;
|
||||
|
||||
- SYCLDeviceImages.push_back(
|
||||
- createSyclObjFromImpl<device_image_plain>(Impl));
|
||||
- break;
|
||||
+ // Update tracked information.
|
||||
+ for (kernel_id &KernelID : *ImageKernelIDs) {
|
||||
+ StateImagesPairT *StateImagesPair;
|
||||
+ // If only specific kernels are requested, ignore the rest.
|
||||
+ if (!KernelIDs.empty()) {
|
||||
+ auto It = KernelImageMap.find(KernelID);
|
||||
+ if (It == KernelImageMap.end())
|
||||
+ continue;
|
||||
+ StateImagesPair = &It->second;
|
||||
+ } else
|
||||
+ StateImagesPair = &KernelImageMap[KernelID];
|
||||
+
|
||||
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
|
||||
+
|
||||
+ if (KernelImages.empty()) {
|
||||
+ KernelImagesState = ImgState;
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ ++ImgRequirementCounter;
|
||||
+ } else if (KernelImagesState < ImgState) {
|
||||
+ for (RTDeviceBinaryImage *Img : KernelImages) {
|
||||
+ auto It = ImageInfoMap.find(Img);
|
||||
+ assert(It != ImageInfoMap.end());
|
||||
+ assert(It->second.RequirementCounter > 0);
|
||||
+ --(It->second.RequirementCounter);
|
||||
+ }
|
||||
+ KernelImages.clear();
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ KernelImagesState = ImgState;
|
||||
+ ++ImgRequirementCounter;
|
||||
+ } else if (KernelImagesState == ImgState) {
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ ++ImgRequirementCounter;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
+ for (const auto &ImgInfoPair : ImageInfoMap) {
|
||||
+ if (ImgInfoPair.second.RequirementCounter == 0)
|
||||
+ continue;
|
||||
+
|
||||
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
|
||||
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
|
||||
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
|
||||
+
|
||||
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
|
||||
+ }
|
||||
+
|
||||
return SYCLDeviceImages;
|
||||
}
|
||||
|
||||
|
|
|
@ -149,3 +149,19 @@ index 074f910a2..30f490818 100644
|
|||
return is_hit_first | is_hit_second;
|
||||
}
|
||||
};
|
||||
diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
|
||||
--- a/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:23:06.429190200 +0200
|
||||
+++ b/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:35:01.291938600 +0200
|
||||
@@ -540,7 +540,12 @@
|
||||
assert(offset <= geomDescrData.size());
|
||||
}
|
||||
|
||||
+ /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
|
||||
+#if TBB_VERSION_MAJOR<2021
|
||||
+ RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
|
||||
+#else
|
||||
RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
|
||||
+#endif
|
||||
|
||||
/* estimate static accel size */
|
||||
BBox1f time_range(0,1);
|
||||
|
|
|
@ -281,6 +281,9 @@ endif()
|
|||
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
add_definitions(-DWITH_EMBREE)
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
|
||||
add_definitions(-DWITH_EMBREE_GPU)
|
||||
endif()
|
||||
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
|
||||
include_directories(
|
||||
SYSTEM
|
||||
|
|
|
@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||
default=False,
|
||||
)
|
||||
|
||||
use_oneapirt: BoolProperty(
|
||||
name="Embree on GPU (Experimental)",
|
||||
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
|
||||
"However this support is experimental and some scenes may render incorrectly",
|
||||
default=False,
|
||||
)
|
||||
|
||||
kernel_optimization_level: EnumProperty(
|
||||
name="Kernel Optimization",
|
||||
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
|
||||
|
@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||
col.prop(self, "kernel_optimization_level")
|
||||
col.prop(self, "use_metalrt")
|
||||
|
||||
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
|
||||
row = layout.row()
|
||||
row.use_property_split = True
|
||||
row.prop(self, "use_oneapirt")
|
||||
|
||||
def draw(self, context):
|
||||
self.draw_impl(self.layout, context)
|
||||
|
||||
|
|
|
@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
|
|||
device.has_peer_memory = false;
|
||||
}
|
||||
|
||||
if (get_boolean(cpreferences, "use_metalrt")) {
|
||||
device.use_metalrt = true;
|
||||
bool accumulated_use_hardware_raytracing = false;
|
||||
foreach (
|
||||
DeviceInfo &info,
|
||||
(device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
|
||||
if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
|
||||
info.use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
|
||||
info.use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
/* There is an accumulative logic here, because Multidevices are support only for
|
||||
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
|
||||
* global boolean backend setting (see above) for enabling/disabling HW RT,
|
||||
* so all subdevices in the multidevice should enable (or disable) HW RT
|
||||
* simultaneously (and CPU device are expected to ignore "use_hardware_raytracing" setting) */
|
||||
accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
|
||||
}
|
||||
device.use_hardware_raytracing = accumulated_use_hardware_raytracing;
|
||||
|
||||
if (preview) {
|
||||
/* Disable specialization for preview renders. */
|
||||
|
|
|
@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
|
|||
Py_INCREF(Py_False);
|
||||
#endif /* WITH_EMBREE */
|
||||
|
||||
#ifdef WITH_EMBREE_GPU
|
||||
PyModule_AddObject(mod, "with_embree_gpu", Py_True);
|
||||
Py_INCREF(Py_True);
|
||||
#else /* WITH_EMBREE_GPU */
|
||||
PyModule_AddObject(mod, "with_embree_gpu", Py_False);
|
||||
Py_INCREF(Py_False);
|
||||
#endif /* WITH_EMBREE_GPU */
|
||||
|
||||
if (ccl::openimagedenoise_supported()) {
|
||||
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
|
||||
Py_INCREF(Py_True);
|
||||
|
|
|
@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
|
|||
texture_info.free();
|
||||
}
|
||||
|
||||
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
|
||||
#ifdef WITH_EMBREE
|
||||
|
|
|
@ -56,7 +56,7 @@ class CPUDevice : public Device {
|
|||
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
||||
~CPUDevice();
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||
* re-initialization might be needed). */
|
||||
|
|
|
@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
|
|||
return path_exists(cubins_path);
|
||||
}
|
||||
|
||||
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {
|
|||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
|
|
|
@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|||
info.has_guiding = true;
|
||||
info.has_profiling = true;
|
||||
info.has_peer_memory = false;
|
||||
info.use_metalrt = false;
|
||||
info.use_hardware_raytracing = false;
|
||||
info.denoisers = DENOISER_ALL;
|
||||
|
||||
foreach (const DeviceInfo &device, subdevices) {
|
||||
|
@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|||
info.has_guiding &= device.has_guiding;
|
||||
info.has_profiling &= device.has_profiling;
|
||||
info.has_peer_memory |= device.has_peer_memory;
|
||||
info.use_metalrt |= device.use_metalrt;
|
||||
info.use_hardware_raytracing |= device.use_hardware_raytracing;
|
||||
info.denoisers &= device.denoisers;
|
||||
}
|
||||
|
||||
|
|
|
@ -71,15 +71,16 @@ class DeviceInfo {
|
|||
string description;
|
||||
string id; /* used for user preferences, should stay fixed with changing hardware config */
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_light_tree; /* Support light tree. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_guiding; /* Support path guiding. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||
bool has_gpu_queue; /* Device supports GPU queue. */
|
||||
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_light_tree; /* Support light tree. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_guiding; /* Support path guiding. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||
bool has_gpu_queue; /* Device supports GPU queue. */
|
||||
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
|
||||
*/
|
||||
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
|
||||
* kernels (Metal only). */
|
||||
DenoiserTypeMask denoisers; /* Supported denoiser types. */
|
||||
|
@ -101,7 +102,7 @@ class DeviceInfo {
|
|||
has_profiling = false;
|
||||
has_peer_memory = false;
|
||||
has_gpu_queue = false;
|
||||
use_metalrt = false;
|
||||
use_hardware_raytracing = false;
|
||||
denoisers = DENOISER_NONE;
|
||||
}
|
||||
|
||||
|
@ -157,7 +158,7 @@ class Device {
|
|||
fprintf(stderr, "%s\n", error.c_str());
|
||||
fflush(stderr);
|
||||
}
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;
|
||||
|
||||
/* statistics */
|
||||
Stats &stats;
|
||||
|
|
|
@ -20,7 +20,7 @@ class DummyDevice : public Device {
|
|||
|
||||
~DummyDevice() {}
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
|
|||
return path_exists(fatbins_path);
|
||||
}
|
||||
|
||||
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {
|
|||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
#include "device/kernel.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
# include "util/log.h"
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
|||
case DEVICE_KERNEL_NUM:
|
||||
break;
|
||||
};
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
|
||||
#endif
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
|
||||
{
|
||||
os << device_kernel_as_string(kernel);
|
||||
|
@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)
|
|||
|
||||
return str;
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -3,11 +3,13 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/types.h"
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
# include "kernel/types.h"
|
||||
|
||||
#include "util/string.h"
|
||||
# include "util/string.h"
|
||||
|
||||
#include <ostream> // NOLINT
|
||||
# include <ostream> // NOLINT
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
|
|||
bool device_kernel_has_intersection(DeviceKernel kernel);
|
||||
|
||||
const char *device_kernel_as_string(DeviceKernel kernel);
|
||||
|
||||
#ifndef __KERNEL_ONEAPI__
|
||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
||||
|
||||
typedef uint64_t DeviceKernelMask;
|
||||
string device_kernel_mask_as_string(DeviceKernelMask mask);
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -100,7 +100,7 @@ class MetalDevice : public Device {
|
|||
|
||||
virtual void cancel() override;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
|
|||
return get_device_by_ID(ID, lock) == nullptr;
|
||||
}
|
||||
|
||||
BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||
}
|
||||
case METAL_GPU_AMD: {
|
||||
max_threads_per_threadgroup = 128;
|
||||
use_metalrt = info.use_metalrt;
|
||||
use_metalrt = info.use_hardware_raytracing;
|
||||
break;
|
||||
}
|
||||
case METAL_GPU_APPLE: {
|
||||
max_threads_per_threadgroup = 512;
|
||||
use_metalrt = info.use_metalrt;
|
||||
use_metalrt = info.use_hardware_raytracing;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -96,12 +96,13 @@ class MultiDevice : public Device {
|
|||
return error_msg;
|
||||
}
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
|
||||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
|
||||
foreach (const SubDevice &sub_device, devices) {
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
|
||||
kernel_features);
|
||||
bvh_layout_mask &= device_bvh_layout_mask;
|
||||
bvh_layout_mask_all |= device_bvh_layout_mask;
|
||||
}
|
||||
|
|
|
@ -40,12 +40,12 @@ bool device_oneapi_init()
|
|||
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
|
||||
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
||||
}
|
||||
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
||||
if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
|
||||
}
|
||||
else {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
|
||||
}
|
||||
}
|
||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||
|
@ -58,10 +58,10 @@ bool device_oneapi_init()
|
|||
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
||||
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
|
||||
}
|
||||
else {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
|
||||
}
|
||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||
|
@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
|
|||
}
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
|
||||
static void device_iterator_cb(
|
||||
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
|
||||
{
|
||||
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
|
||||
|
||||
|
@ -112,6 +113,13 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
|
|||
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
|
||||
info.display_device = false;
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
info.use_hardware_raytracing = hwrt_support;
|
||||
# else
|
||||
info.use_hardware_raytracing = false;
|
||||
(void)hwrt_support;
|
||||
# endif
|
||||
|
||||
devices->push_back(info);
|
||||
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
|
||||
}
|
||||
|
|
|
@ -8,7 +8,19 @@
|
|||
# include "util/debug.h"
|
||||
# include "util/log.h"
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
# include "bvh/embree.h"
|
||||
# endif
|
||||
|
||||
# include "kernel/device/oneapi/globals.h"
|
||||
# include "kernel/device/oneapi/kernel.h"
|
||||
|
||||
# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
|
||||
/* These declarations are missing from embree headers when compiling from a compiler that doesn't
|
||||
* support SYCL. */
|
||||
extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
|
||||
extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
|
|||
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler),
|
||||
device_queue_(nullptr),
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
embree_device(nullptr),
|
||||
embree_scene(nullptr),
|
||||
# endif
|
||||
texture_info_(this, "texture_info", MEM_GLOBAL),
|
||||
kg_memory_(nullptr),
|
||||
kg_memory_device_(nullptr),
|
||||
kg_memory_size_(0)
|
||||
{
|
||||
need_texture_info_ = false;
|
||||
use_hardware_raytracing = info.use_hardware_raytracing;
|
||||
|
||||
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
|
||||
|
||||
bool is_finished_ok = create_queue(device_queue_, info.num);
|
||||
bool is_finished_ok = create_queue(device_queue_,
|
||||
info.num,
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
use_hardware_raytracing ? &embree_device : nullptr
|
||||
# else
|
||||
nullptr
|
||||
# endif
|
||||
);
|
||||
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI queue initialization error: got runtime exception \"" +
|
||||
oneapi_error_string_ + "\"");
|
||||
|
@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
|||
assert(device_queue_);
|
||||
}
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
|
||||
# else
|
||||
use_hardware_raytracing = false;
|
||||
# endif
|
||||
|
||||
if (use_hardware_raytracing) {
|
||||
VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
|
||||
}
|
||||
|
||||
size_t globals_segment_size;
|
||||
is_finished_ok = kernel_globals_size(globals_segment_size);
|
||||
if (is_finished_ok == false) {
|
||||
|
@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
|||
|
||||
OneapiDevice::~OneapiDevice()
|
||||
{
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (embree_device)
|
||||
rtcReleaseDevice(embree_device);
|
||||
# endif
|
||||
|
||||
texture_info_.free();
|
||||
usm_free(device_queue_, kg_memory_);
|
||||
usm_free(device_queue_, kg_memory_device_);
|
||||
|
@ -80,15 +120,47 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
|
|||
return false;
|
||||
}
|
||||
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
|
||||
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
/* MNEE and Raytrace kernels currently don't work correctly with HWRT. */
|
||||
return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
|
||||
}
|
||||
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
|
||||
{
|
||||
return (use_hardware_raytracing &&
|
||||
can_use_hardware_raytracing_for_features(requested_features)) ?
|
||||
BVH_LAYOUT_EMBREE :
|
||||
BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
{
|
||||
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
|
||||
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
||||
if (refit) {
|
||||
bvh_embree->refit(progress);
|
||||
}
|
||||
else {
|
||||
bvh_embree->build(progress, &stats, embree_device);
|
||||
}
|
||||
if (bvh->params.top_level) {
|
||||
embree_scene = bvh_embree->scene;
|
||||
}
|
||||
}
|
||||
else {
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
{
|
||||
assert(device_queue_);
|
||||
|
||||
kernel_features = requested_features;
|
||||
|
||||
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
|
||||
|
@ -100,7 +172,14 @@ bool OneapiDevice::load_kernels(const uint requested_features)
|
|||
assert(device_queue_);
|
||||
}
|
||||
|
||||
is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
|
||||
if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
|
||||
VLOG_INFO
|
||||
<< "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
|
||||
use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
is_finished_ok = oneapi_load_kernels(
|
||||
device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
|
||||
}
|
||||
|
@ -327,6 +406,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
|
|||
<< string_human_readable_number(size) << " bytes. ("
|
||||
<< string_human_readable_size(size) << ")";
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (strcmp(name, "data") == 0) {
|
||||
assert(size <= sizeof(KernelData));
|
||||
|
||||
/* Update scene handle(since it is different for each device on multi devices) */
|
||||
KernelData *const data = (KernelData *)host;
|
||||
data->device_bvh = embree_scene;
|
||||
}
|
||||
# endif
|
||||
|
||||
ConstMemMap::iterator i = const_mem_map_.find(name);
|
||||
device_vector<uchar> *data;
|
||||
|
||||
|
@ -446,7 +535,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
|
|||
# endif
|
||||
}
|
||||
|
||||
bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
|
||||
bool OneapiDevice::create_queue(SyclQueue *&external_queue,
|
||||
int device_index,
|
||||
void *embree_device_pointer)
|
||||
{
|
||||
bool finished_correct = true;
|
||||
try {
|
||||
|
@ -457,6 +548,13 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
|
|||
sycl::queue *created_queue = new sycl::queue(devices[device_index],
|
||||
sycl::property::queue::in_order());
|
||||
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (embree_device_pointer) {
|
||||
*((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
|
||||
}
|
||||
# else
|
||||
(void)embree_device_pointer;
|
||||
# endif
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
finished_correct = false;
|
||||
|
@ -625,7 +723,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
|
|||
size_t global_size,
|
||||
void **args)
|
||||
{
|
||||
return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
|
||||
return oneapi_enqueue_kernel(
|
||||
kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
|
||||
}
|
||||
|
||||
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
|
||||
|
@ -830,12 +929,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
|
|||
std::string name = device.get_info<sycl::info::device::name>();
|
||||
# else
|
||||
std::string name = "SYCL Host Task (Debug)";
|
||||
# endif
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
bool hwrt_support = rtcIsSYCLDeviceSupported(device);
|
||||
# else
|
||||
bool hwrt_support = false;
|
||||
# endif
|
||||
std::string id = "ONEAPI_" + platform_name + "_" + name;
|
||||
if (device.has(sycl::aspect::ext_intel_pci_address)) {
|
||||
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
|
||||
}
|
||||
(cb)(id.c_str(), name.c_str(), num, user_ptr);
|
||||
(cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
|
||||
num++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
class DeviceQueue;
|
||||
|
||||
typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
|
||||
const char *name,
|
||||
int num,
|
||||
void *user_ptr);
|
||||
typedef void (*OneAPIDeviceIteratorCallback)(
|
||||
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);
|
||||
|
||||
class OneapiDevice : public Device {
|
||||
private:
|
||||
SyclQueue *device_queue_;
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
RTCDevice embree_device;
|
||||
RTCScene embree_scene;
|
||||
# endif
|
||||
using ConstMemMap = map<string, device_vector<uchar> *>;
|
||||
ConstMemMap const_mem_map_;
|
||||
device_vector<TextureInfo> texture_info_;
|
||||
|
@ -34,17 +35,21 @@ class OneapiDevice : public Device {
|
|||
size_t kg_memory_size_ = (size_t)0;
|
||||
size_t max_memory_on_device_ = (size_t)0;
|
||||
std::string oneapi_error_string_;
|
||||
bool use_hardware_raytracing = false;
|
||||
unsigned int kernel_features = 0;
|
||||
|
||||
public:
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
|
||||
|
||||
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
virtual ~OneapiDevice();
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
# endif
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
bool load_kernels(const uint requested_features) override;
|
||||
bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
|
@ -113,8 +118,9 @@ class OneapiDevice : public Device {
|
|||
SyclQueue *sycl_queue();
|
||||
|
||||
protected:
|
||||
bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
|
||||
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
|
||||
bool create_queue(SyclQueue *&external_queue, int device_index);
|
||||
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
|
||||
void free_queue(SyclQueue *queue);
|
||||
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
|
||||
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
|
||||
|
|
|
@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
|
|||
return make_unique<OptiXDeviceQueue>(this);
|
||||
}
|
||||
|
||||
BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
|
||||
BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
|
||||
{
|
||||
/* OptiX has its own internal acceleration structure format. */
|
||||
return BVH_LAYOUT_OPTIX;
|
||||
|
|
|
@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
|
|||
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
~OptiXDevice();
|
||||
|
||||
BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
||||
|
||||
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
|
|
|
@ -299,8 +299,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
|||
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
||||
* schedules work in halves of available number of paths. */
|
||||
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
||||
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
|
||||
0);
|
||||
work_tile_scheduler_.set_accelerated_rt(
|
||||
(device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
|
||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||
start_sample,
|
||||
samples_num,
|
||||
|
|
|
@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
|
|||
device/oneapi/compat.h
|
||||
device/oneapi/context_begin.h
|
||||
device/oneapi/context_end.h
|
||||
device/oneapi/context_intersect_begin.h
|
||||
device/oneapi/context_intersect_end.h
|
||||
device/oneapi/globals.h
|
||||
device/oneapi/image.h
|
||||
device/oneapi/kernel.h
|
||||
device/oneapi/kernel_templates.h
|
||||
device/cpu/bvh.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||
|
@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
|||
|
||||
# Set defaults for spir64 and spir64_gen options
|
||||
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
|
||||
endif()
|
||||
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
|
||||
|
@ -776,7 +779,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
|||
# Host execution won't use GPU binaries, no need to compile them.
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
|
||||
# AoT binaries aren't currently reused when calling sycl::build.
|
||||
list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
|
||||
list(APPEND sycl_compiler_flags -DWITH_CYCLES_ONEAPI_BINARIES)
|
||||
# Iterate over all targest and their options
|
||||
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
|
||||
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
|
||||
|
@ -798,6 +801,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
|||
-I"${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
|
||||
list(APPEND sycl_compiler_flags
|
||||
-DWITH_EMBREE
|
||||
-DWITH_EMBREE_GPU
|
||||
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
|
||||
-I"${EMBREE_INCLUDE_DIRS}")
|
||||
|
||||
if(WIN32)
|
||||
list(APPEND sycl_compiler_flags
|
||||
-ladvapi32.lib
|
||||
)
|
||||
endif()
|
||||
|
||||
set(next_library_mode "")
|
||||
foreach(library ${EMBREE_LIBRARIES})
|
||||
string(TOLOWER "${library}" library_lower)
|
||||
if(("${library_lower}" STREQUAL "optimized") OR
|
||||
("${library_lower}" STREQUAL "debug"))
|
||||
set(next_library_mode "${library_lower}")
|
||||
else()
|
||||
if(next_library_mode STREQUAL "")
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
||||
else()
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
||||
endif()
|
||||
set(next_library_mode "")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
foreach(library ${TBB_LIBRARIES})
|
||||
string(TOLOWER "${library}" library_lower)
|
||||
if(("${library_lower}" STREQUAL "optimized") OR
|
||||
("${library_lower}" STREQUAL "debug"))
|
||||
set(next_library_mode "${library_lower}")
|
||||
else()
|
||||
if(next_library_mode STREQUAL "")
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
||||
else()
|
||||
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
||||
endif()
|
||||
set(next_library_mode "")
|
||||
endif()
|
||||
endforeach()
|
||||
list(APPEND sycl_compiler_flags
|
||||
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
|
|
@ -21,6 +21,28 @@
|
|||
# define __BVH2__
|
||||
#endif
|
||||
|
||||
#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
|
||||
/* bool is apparently not tested for specialization constants:
|
||||
* https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
|
||||
* Instead of adding one more bool specialization constant, we reuse existing embree_features one
|
||||
* and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
|
||||
*/
|
||||
/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and raytrace kernels
|
||||
* precompiled without Embree.
|
||||
* Changing this default value would require updating the logic in oneapi_load_kernels(). */
|
||||
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
|
||||
RTC_FEATURE_FLAG_NONE};
|
||||
# define IF_USING_EMBREE \
|
||||
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
|
||||
RTC_FEATURE_FLAG_NONE)
|
||||
# define IF_NOT_USING_EMBREE \
|
||||
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
|
||||
RTC_FEATURE_FLAG_NONE)
|
||||
#else
|
||||
# define IF_USING_EMBREE
|
||||
# define IF_NOT_USING_EMBREE
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __BVH2__
|
||||
|
@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
|||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Single object BVH traversal, for SSS/AO/bevel. */
|
||||
|
@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
|||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(
|
||||
kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
|
@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
|||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
|
@ -239,13 +289,19 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
|||
return false;
|
||||
}
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
|
||||
|
||||
|
@ -275,18 +331,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
|
|||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
|
||||
|
|
|
@ -13,8 +13,13 @@
|
|||
# include <embree3/rtcore_scene.h>
|
||||
#endif
|
||||
|
||||
#include "kernel/device/cpu/compat.h"
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/compat.h"
|
||||
# include "kernel/device/oneapi/globals.h"
|
||||
#else
|
||||
# include "kernel/device/cpu/compat.h"
|
||||
# include "kernel/device/cpu/globals.h"
|
||||
#endif
|
||||
|
||||
#include "kernel/bvh/types.h"
|
||||
#include "kernel/bvh/util.h"
|
||||
|
@ -33,11 +38,16 @@ using numhit_t = uint8_t;
|
|||
using numhit_t = uint32_t;
|
||||
#endif
|
||||
|
||||
#define CYCLES_EMBREE_USED_FEATURES \
|
||||
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
|
||||
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# define CYCLES_EMBREE_USED_FEATURES \
|
||||
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
|
||||
#else
|
||||
# define CYCLES_EMBREE_USED_FEATURES \
|
||||
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
|
||||
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
|
||||
#endif
|
||||
|
||||
#define EMBREE_IS_HAIR(x) (x & 1)
|
||||
|
||||
|
@ -252,7 +262,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
|
|||
* Things like recording subsurface or shadow hits for later evaluation
|
||||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls. */
|
||||
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
|
||||
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
assert(args->N == 1);
|
||||
|
@ -263,7 +274,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
|
|||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(
|
||||
|
@ -277,7 +292,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
|
|||
* as well as filtering for volume objects happen here.
|
||||
* Cycles' own BVH does that directly inside the traversal calls.
|
||||
*/
|
||||
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
|
@ -290,7 +305,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
|||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
Intersection current_isect;
|
||||
|
@ -326,7 +345,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
|||
}
|
||||
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
|
||||
if (ctx->num_recorded_hits < max_record_hits) {
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
|
@ -363,7 +382,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
|||
*args->valid = 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
|
@ -376,7 +395,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
|||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
/* Check if it's hitting the correct object. */
|
||||
|
@ -462,7 +485,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
|||
*args->valid = 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
||||
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
|
||||
const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||
|
@ -475,7 +498,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
|||
#else
|
||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||
#endif
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
/* Append the intersection to the end of the array. */
|
||||
|
@ -513,14 +540,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(
|
|||
|
||||
switch (ctx->type) {
|
||||
case CCLIntersectContext::RAY_SHADOW_ALL:
|
||||
kernel_embree_filter_occluded_shadow_all_func(args);
|
||||
kernel_embree_filter_occluded_shadow_all_func_impl(args);
|
||||
break;
|
||||
case CCLIntersectContext::RAY_LOCAL:
|
||||
case CCLIntersectContext::RAY_SSS:
|
||||
kernel_embree_filter_occluded_local_func(args);
|
||||
kernel_embree_filter_occluded_local_func_impl(args);
|
||||
break;
|
||||
case CCLIntersectContext::RAY_VOLUME_ALL:
|
||||
kernel_embree_filter_occluded_volume_all_func(args);
|
||||
kernel_embree_filter_occluded_volume_all_func_impl(args);
|
||||
break;
|
||||
|
||||
case CCLIntersectContext::RAY_REGULAR:
|
||||
|
@ -569,7 +596,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
|
|||
|
||||
kernel_embree_filter_occluded_func(args);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_intersection_func_impl(args);
|
||||
}
|
||||
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
|
||||
}
|
||||
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_occluded_local_func_impl(args);
|
||||
}
|
||||
|
||||
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
|
||||
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
|
||||
}
|
||||
|
||||
# define kernel_embree_filter_intersection_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
|
||||
# define kernel_embree_filter_occluded_shadow_all_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
|
||||
# define kernel_embree_filter_occluded_local_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
|
||||
# define kernel_embree_filter_occluded_volume_all_func \
|
||||
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
|
||||
#else
|
||||
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
# define kernel_embree_filter_occluded_shadow_all_func \
|
||||
kernel_embree_filter_occluded_shadow_all_func_impl
|
||||
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
|
||||
# define kernel_embree_filter_occluded_volume_all_func \
|
||||
kernel_embree_filter_occluded_volume_all_func_impl
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Scene intersection. */
|
||||
|
@ -583,7 +666,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
|||
#if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLFirstHitContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
#else
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
rtcInitIntersectContext(&ctx);
|
||||
|
@ -596,7 +687,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
|||
#if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCIntersectArguments args;
|
||||
rtcInitIntersectArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
|
||||
|
@ -625,7 +716,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
|||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLLocalContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
# else
|
||||
CCLIntersectContext ctx(kg,
|
||||
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
|
||||
|
@ -646,7 +745,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
|||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCOccludedArguments args;
|
||||
rtcInitOccludedArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
# endif
|
||||
|
@ -692,7 +791,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
|||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||
IntegratorShadowStateCPU *state,
|
||||
IntegratorShadowState state,
|
||||
ccl_private const Ray *ray,
|
||||
uint visibility,
|
||||
uint max_hits,
|
||||
|
@ -702,7 +801,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
|||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLShadowContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
# else
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
rtcInitIntersectContext(&ctx);
|
||||
|
@ -718,7 +825,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
|||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCOccludedArguments args;
|
||||
rtcInitOccludedArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(
|
||||
kernel_embree_filter_occluded_shadow_all_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
||||
|
@ -742,7 +850,15 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
|||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
CCLVolumeContext ctx;
|
||||
rtcInitRayQueryContext(&ctx);
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||
ctx.kg = (KernelGlobals)this;
|
||||
# else
|
||||
ctx.kg = kg;
|
||||
# endif
|
||||
# else
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
rtcInitIntersectContext(&ctx);
|
||||
|
@ -756,7 +872,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
|||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
RTCOccludedArguments args;
|
||||
rtcInitOccludedArguments(&args);
|
||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
|
||||
args.filter = reinterpret_cast<RTCFilterFunctionN>(
|
||||
kernel_embree_filter_occluded_volume_all_func);
|
||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||
args.context = &ctx;
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
||||
|
|
|
@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
/* Intersection kernels need access to the kernel handler for specialization constants to work
|
||||
* properly. */
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_begin.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
||||
ccl_global const int *path_index_array,
|
||||
|
@ -185,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_end.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_background,
|
||||
ccl_global const int *path_index_array,
|
||||
|
@ -249,6 +259,12 @@ ccl_gpu_kernel_postfix
|
|||
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
|
||||
#endif
|
||||
|
||||
/* Kernels using intersections need access to the kernel handler for specialization constants to
|
||||
* work properly. */
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_begin.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
|
||||
ccl_global const int *path_index_array,
|
||||
|
@ -287,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_end.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_volume,
|
||||
|
|
|
@ -5,6 +5,11 @@
|
|||
|
||||
#define __KERNEL_GPU__
|
||||
#define __KERNEL_ONEAPI__
|
||||
#define __KERNEL_64_BIT__
|
||||
|
||||
#ifdef WITH_EMBREE_GPU
|
||||
# define __KERNEL_GPU_RAYTRACING__
|
||||
#endif
|
||||
|
||||
#define CCL_NAMESPACE_BEGIN
|
||||
#define CCL_NAMESPACE_END
|
||||
|
@ -57,17 +62,19 @@
|
|||
#define ccl_gpu_kernel_threads(block_num_threads)
|
||||
|
||||
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
|
||||
# define ccl_gpu_kernel_signature(name, ...) \
|
||||
# define __ccl_gpu_kernel_signature(name, ...) \
|
||||
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||
size_t kernel_global_size, \
|
||||
size_t kernel_local_size, \
|
||||
sycl::handler &cgh, \
|
||||
__VA_ARGS__) { \
|
||||
(kg); \
|
||||
cgh.parallel_for<class kernel_##name>( \
|
||||
cgh.parallel_for( \
|
||||
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
||||
[=](sycl::nd_item<1> item) {
|
||||
|
||||
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
|
||||
|
||||
# define ccl_gpu_kernel_postfix \
|
||||
}); \
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2023 Intel Corporation */
|
||||
|
||||
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
|
||||
# undef ccl_gpu_kernel_signature
|
||||
# define ccl_gpu_kernel_signature(name, ...) \
|
||||
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||
size_t kernel_global_size, \
|
||||
size_t kernel_local_size, \
|
||||
sycl::handler &cgh, \
|
||||
__VA_ARGS__) \
|
||||
{ \
|
||||
(kg); \
|
||||
cgh.parallel_for( \
|
||||
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
||||
[=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
|
||||
((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
|
||||
#endif
|
|
@ -0,0 +1,7 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2023 Intel Corporation */
|
||||
|
||||
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
|
||||
# undef ccl_gpu_kernel_signature
|
||||
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
|
||||
#endif
|
|
@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
|
|||
size_t nd_item_group_range_0;
|
||||
size_t nd_item_global_id_0;
|
||||
size_t nd_item_global_range_0;
|
||||
#else
|
||||
sycl::kernel_handler kernel_handler;
|
||||
#endif
|
||||
} KernelGlobalsGPU;
|
||||
|
||||
|
|
|
@ -16,9 +16,22 @@
|
|||
|
||||
# include "kernel/device/gpu/kernel.h"
|
||||
|
||||
# include "device/kernel.cpp"
|
||||
|
||||
static OneAPIErrorCallback s_error_cb = nullptr;
|
||||
static void *s_error_user_ptr = nullptr;
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
|
||||
(const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
|
||||
RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
|
||||
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
|
||||
(const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
|
||||
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
|
||||
# endif
|
||||
|
||||
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
|
||||
{
|
||||
s_error_cb = cb;
|
||||
|
@ -142,15 +155,96 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
|
|||
return std::min(limit_work_group_size, preferred_work_group_size);
|
||||
}
|
||||
|
||||
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
|
||||
const uint kernel_features)
|
||||
{
|
||||
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
|
||||
{
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* MNEE and Raytrace kernels aren't yet enabled to use Embree. */
|
||||
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
|
||||
DeviceKernel kernel = (DeviceKernel)i;
|
||||
if (device_kernel_has_intersection(kernel)) {
|
||||
if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
|
||||
return !(kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
|
||||
}
|
||||
}
|
||||
}
|
||||
# endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool oneapi_load_kernels(SyclQueue *queue_,
|
||||
const uint kernel_features,
|
||||
bool use_hardware_raytracing)
|
||||
{
|
||||
# ifdef SYCL_SKIP_KERNELS_PRELOAD
|
||||
(void)queue_;
|
||||
(void)requested_features;
|
||||
# else
|
||||
assert(queue_);
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* For best performance, we always JIT compile the kernels that are using Embree. */
|
||||
if (use_hardware_raytracing) {
|
||||
try {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||
{queue->get_device()});
|
||||
|
||||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
|
||||
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
|
||||
!oneapi_kernel_is_using_embree(kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
|
||||
/* Hair requires embree curves support. */
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
else {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
if (s_error_cb) {
|
||||
s_error_cb(e.what(), s_error_user_ptr);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef WITH_CYCLES_ONEAPI_BINARIES
|
||||
(void)queue_;
|
||||
(void)kernel_features;
|
||||
# else
|
||||
try {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||
|
@ -159,27 +253,25 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
|||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
|
||||
/* NOTE(@nsirgien): Names in this conditions below should match names from
|
||||
* oneapi_call macro in oneapi_enqueue_kernel below */
|
||||
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
|
||||
/* In case HWRT is on, compilation of kernels using Embree is already handled in previous
|
||||
* block. */
|
||||
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
|
||||
(use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
|
||||
std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
sycl::build(one_kernel_bundle);
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* This is expected to be the default, we set it again to be sure. */
|
||||
if (one_kernel_bundle_input
|
||||
.has_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>()) {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
RTC_FEATURE_FLAG_NONE);
|
||||
}
|
||||
# endif
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
|
@ -195,6 +287,8 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
|||
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
int kernel,
|
||||
size_t global_size,
|
||||
const uint kernel_features,
|
||||
bool use_hardware_raytracing,
|
||||
void **args)
|
||||
{
|
||||
bool success = true;
|
||||
|
@ -248,6 +342,21 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||
|
||||
try {
|
||||
queue->submit([&](sycl::handler &cgh) {
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* Spec says it has no effect if the called kernel doesn't support the below specialization
|
||||
* constant but it can still trigger a recompilation, so we set it only if needed. */
|
||||
if (device_kernel_has_intersection(device_kernel)) {
|
||||
const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
|
||||
RTC_FEATURE_FLAG_NONE :
|
||||
!(kernel_features & KERNEL_FEATURE_HAIR) ?
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
|
||||
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
used_embree_features);
|
||||
}
|
||||
# else
|
||||
(void)kernel_features;
|
||||
# endif
|
||||
switch (device_kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_RESET: {
|
||||
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
|
||||
|
@ -549,4 +658,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||
# endif
|
||||
return success;
|
||||
}
|
||||
|
||||
#endif /* WITH_ONEAPI */
|
||||
|
|
|
@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
|
|||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
|
||||
int kernel,
|
||||
size_t global_size,
|
||||
const unsigned int kernel_features,
|
||||
bool use_hardware_raytracing,
|
||||
void **args);
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
|
||||
const unsigned int requested_features);
|
||||
const unsigned int kernel_features,
|
||||
bool use_hardware_raytracing);
|
||||
# ifdef __cplusplus
|
||||
}
|
||||
|
||||
# endif
|
||||
#endif /* WITH_ONEAPI */
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
|
||||
# if EMBREE_MAJOR_VERSION >= 4
|
||||
#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
|
||||
defined(WITH_EMBREE)
|
||||
# if EMBREE_MAJOR_VERSION == 4
|
||||
# include <embree4/rtcore.h>
|
||||
# include <embree4/rtcore_scene.h>
|
||||
# else
|
||||
|
|
|
@ -194,8 +194,8 @@ void Geometry::compute_bvh(Device *device,
|
|||
|
||||
compute_bounds();
|
||||
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout,
|
||||
device->get_bvh_layout_mask());
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
|
||||
params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
if (need_build_bvh(bvh_layout)) {
|
||||
string msg = "Updating Geometry BVH ";
|
||||
if (name.empty())
|
||||
|
@ -1235,8 +1235,8 @@ void GeometryManager::device_update_bvh(Device *device,
|
|||
|
||||
BVHParams bparams;
|
||||
bparams.top_level = true;
|
||||
bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
|
||||
device->get_bvh_layout_mask());
|
||||
bparams.bvh_layout = BVHParams::best_bvh_layout(
|
||||
scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
|
||||
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
|
||||
scene->params.use_bvh_unaligned_nodes;
|
||||
|
@ -1889,8 +1889,8 @@ void GeometryManager::device_update(Device *device,
|
|||
/* Device update. */
|
||||
device_free(device, dscene, false);
|
||||
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
|
||||
device->get_bvh_layout_mask());
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
|
||||
scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
geom_calc_offset(scene, bvh_layout);
|
||||
if (true_displacement_used || curve_shadow_transparency_used) {
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
|
@ -2051,8 +2051,8 @@ void GeometryManager::device_update(Device *device,
|
|||
|
||||
/* Always set BVH layout again after displacement where it was set to none,
|
||||
* to avoid ray-tracing at that stage. */
|
||||
dscene->data.bvh.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
|
||||
device->get_bvh_layout_mask());
|
||||
dscene->data.bvh.bvh_layout = BVHParams::best_bvh_layout(
|
||||
scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
|
||||
{
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
|
|
|
@ -595,7 +595,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
|
|||
void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene)
|
||||
{
|
||||
if (!scene->integrator->get_use_light_tree()) {
|
||||
BVHLayoutMask layout_mask = device->get_bvh_layout_mask();
|
||||
BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
|
||||
if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
|
||||
layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
|
||||
return;
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#ifndef __UTIL_VECTOR_H__
|
||||
#define __UTIL_VECTOR_H__
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
|
|
|
@ -363,11 +363,18 @@ class DATA_PT_font(CurveButtonsPanelText, Panel):
|
|||
if mode == 'EDIT_TEXT':
|
||||
layout.separator()
|
||||
|
||||
row = layout.row(align=True)
|
||||
row.prop(char, "use_bold", toggle=True)
|
||||
row.prop(char, "use_italic", toggle=True)
|
||||
row.prop(char, "use_underline", toggle=True)
|
||||
row.prop(char, "use_small_caps", toggle=True)
|
||||
if not text.has_selection:
|
||||
row = layout.row(align=True)
|
||||
row.prop(char, "use_bold", toggle=True)
|
||||
row.prop(char, "use_italic", toggle=True)
|
||||
row.prop(char, "use_underline", toggle=True)
|
||||
row.prop(char, "use_small_caps", toggle=True)
|
||||
else:
|
||||
row = layout.row(align=True)
|
||||
row.operator("font.style_toggle", text="Bold", icon='BOLD' , depress = text.is_select_bold).style = 'BOLD'
|
||||
row.operator("font.style_toggle", text="Italic", icon='ITALIC' , depress = text.is_select_italic).style = 'ITALIC'
|
||||
row.operator("font.style_toggle", text="Underline", icon='UNDERLINE' , depress = text.is_select_underline).style = 'UNDERLINE'
|
||||
row.operator("font.style_toggle", text="Small Caps", icon='SMALL_CAPS' , depress = text.is_select_smallcaps).style = 'SMALL_CAPS'
|
||||
|
||||
|
||||
class DATA_PT_font_transform(CurveButtonsPanelText, Panel):
|
||||
|
|
|
@ -42,6 +42,10 @@ typedef struct EditFont {
|
|||
int len, pos;
|
||||
int selstart, selend;
|
||||
|
||||
/* Combined styles (#CharInfo.flag) for selected string. A flag will be
|
||||
* set only if ALL characters in the selected string have it. */
|
||||
int select_char_info_flag;
|
||||
|
||||
/**
|
||||
* ID data is older than edit-mode data.
|
||||
* Set #Main.is_memfile_undo_flush_needed when enabling.
|
||||
|
|
|
@ -1140,7 +1140,16 @@ static bool vfont_to_curve(Object *ob,
|
|||
}
|
||||
}
|
||||
|
||||
/* Line-data is now: width of line. */
|
||||
if (ef && ef->selboxes) {
|
||||
/* Set combined style flags for the selected string. Start with all styles then
|
||||
* remove one if ANY characters do not have it. Break out if we've removed them all. */
|
||||
ef->select_char_info_flag = CU_CHINFO_BOLD | CU_CHINFO_ITALIC | CU_CHINFO_UNDERLINE |
|
||||
CU_CHINFO_SMALLCAPS;
|
||||
for (int k = selstart; k <= selend && ef->select_char_info_flag; k++) {
|
||||
info = &custrinfo[k];
|
||||
ef->select_char_info_flag &= info->flag;
|
||||
}
|
||||
}
|
||||
|
||||
if (cu->spacemode != CU_ALIGN_X_LEFT) {
|
||||
ct = chartransdata;
|
||||
|
|
|
@ -93,6 +93,7 @@ bool BLI_file_external_operation_supported(const char *filepath, FileExternalOpe
|
|||
char *opstring = windows_operation_string(operation);
|
||||
return BLI_windows_external_operation_supported(filepath, opstring);
|
||||
#else
|
||||
UNUSED_VARS(filepath, operation);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
@ -107,6 +108,7 @@ bool BLI_file_external_operation_execute(const char *filepath, FileExternalOpera
|
|||
}
|
||||
return false;
|
||||
#else
|
||||
UNUSED_VARS(filepath, operation);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -214,7 +214,7 @@ bool BLI_windows_external_operation_supported(const char *filepath, const char *
|
|||
return BLI_windows_file_operation_is_registered(extension, operation);
|
||||
}
|
||||
|
||||
bool BLI_windows_external_operation_execute(const char *filepath, char *operation)
|
||||
bool BLI_windows_external_operation_execute(const char *filepath, const char *operation)
|
||||
{
|
||||
WCHAR wpath[FILE_MAX];
|
||||
if (conv_utf_8_to_16(filepath, wpath, ARRAY_SIZE(wpath)) != 0) {
|
||||
|
|
|
@ -854,16 +854,13 @@ static int toggle_style_exec(bContext *C, wmOperator *op)
|
|||
Curve *cu = obedit->data;
|
||||
int style, clear, selstart, selend;
|
||||
|
||||
if (!BKE_vfont_select_get(obedit, &selstart, &selend)) {
|
||||
return OPERATOR_CANCELLED;
|
||||
}
|
||||
|
||||
style = RNA_enum_get(op->ptr, "style");
|
||||
|
||||
cu->curinfo.flag ^= style;
|
||||
clear = (cu->curinfo.flag & style) == 0;
|
||||
|
||||
return set_style(C, style, clear);
|
||||
if (BKE_vfont_select_get(obedit, &selstart, &selend)) {
|
||||
clear = (cu->curinfo.flag & style) == 0;
|
||||
return set_style(C, style, clear);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void FONT_OT_style_toggle(wmOperatorType *ot)
|
||||
|
|
|
@ -1810,11 +1810,11 @@ static int file_external_operation_exec(bContext *C, wmOperator *op)
|
|||
char filepath[FILE_MAX];
|
||||
RNA_property_string_get(op->ptr, prop, filepath);
|
||||
|
||||
const FileExternalOperation operation = (FileExternalOperation)RNA_enum_get(op->ptr,
|
||||
"operation");
|
||||
WM_cursor_set(CTX_wm_window(C), WM_CURSOR_WAIT);
|
||||
|
||||
#ifdef WIN32
|
||||
const FileExternalOperation operation = (FileExternalOperation)RNA_enum_get(op->ptr,
|
||||
"operation");
|
||||
if (BLI_file_external_operation_execute(filepath, operation)) {
|
||||
WM_cursor_set(CTX_wm_window(C), WM_CURSOR_DEFAULT);
|
||||
return OPERATOR_FINISHED;
|
||||
|
|
|
@ -838,6 +838,15 @@ static bool rna_Curve_is_editmode_get(PointerRNA *ptr)
|
|||
}
|
||||
}
|
||||
|
||||
static bool rna_TextCurve_has_selection_get(PointerRNA *ptr)
|
||||
{
|
||||
Curve *cu = (Curve *)ptr->owner_id;
|
||||
if (cu->editfont != NULL)
|
||||
return (cu->editfont->selboxes != NULL);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static const float tilt_limit = DEG2RADF(21600.0f);
|
||||
|
@ -1272,6 +1281,31 @@ static void rna_def_font(BlenderRNA *UNUSED(brna), StructRNA *srna)
|
|||
RNA_def_property_boolean_sdna(prop, NULL, "flag", CU_FAST);
|
||||
RNA_def_property_ui_text(prop, "Fast Editing", "Don't fill polygons while editing");
|
||||
RNA_def_property_update(prop, 0, "rna_Curve_update_data");
|
||||
|
||||
prop = RNA_def_property(srna, "is_select_bold", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "editfont->select_char_info_flag", CU_CHINFO_BOLD);
|
||||
RNA_def_property_ui_text(prop, "Selected Bold", "Whether the selected text is bold");
|
||||
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
|
||||
|
||||
prop = RNA_def_property(srna, "is_select_italic", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "editfont->select_char_info_flag", CU_CHINFO_ITALIC);
|
||||
RNA_def_property_ui_text(prop, "Selected Italic", "Whether the selected text is italics");
|
||||
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
|
||||
|
||||
prop = RNA_def_property(srna, "is_select_underline", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "editfont->select_char_info_flag", CU_CHINFO_UNDERLINE);
|
||||
RNA_def_property_ui_text(prop, "Selected Underline", "Whether the selected text is underlined");
|
||||
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
|
||||
|
||||
prop = RNA_def_property(srna, "is_select_smallcaps", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "editfont->select_char_info_flag", CU_CHINFO_SMALLCAPS);
|
||||
RNA_def_property_ui_text(prop, "Selected Smallcaps", "Whether the selected text is small caps");
|
||||
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
|
||||
|
||||
prop = RNA_def_property(srna, "has_selection", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_funcs(prop, "rna_TextCurve_has_selection_get", NULL);
|
||||
RNA_def_property_ui_text(prop, "Text Selected", "Whether there is any text selected");
|
||||
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
|
||||
}
|
||||
|
||||
static void rna_def_textbox(BlenderRNA *brna)
|
||||
|
|
Loading…
Reference in New Issue