Initial Grease Pencil 3.0 stage #106848

Merged
Falk David merged 224 commits from filedescriptor/blender:grease-pencil-v3 into main 2023-05-30 11:14:22 +02:00
552 changed files with 9522 additions and 4895 deletions
Showing only changes of commit 04bcf0648b - Show all commits

View File

@ -521,7 +521,8 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
# Radeon VII (gfx906) not currently working with HIP SDK, so left out of the list.
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@ -1580,6 +1581,8 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_MISSING_NORETURN -Wno-missing-noreturn)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_UNUSED_BUT_SET_VARIABLE -Wno-unused-but-set-variable)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_DEPRECATED_DECLARATIONS -Wno-deprecated-declarations)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes)
add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PARAMETER -Wno-unused-parameter)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PRIVATE_FIELD -Wno-unused-private-field)
@ -1593,6 +1596,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNDEFINED_VAR_TEMPLATE -Wno-undefined-var-template)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_INSTANTIATION_AFTER_SPECIALIZATION -Wno-instantiation-after-specialization)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_MISLEADING_INDENTATION -Wno-misleading-indentation)
add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)
elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")

View File

@ -58,9 +58,6 @@ Static Source Code Checking
* check_cppcheck: Run blender source through cppcheck (C & C++).
* check_clang_array: Run blender source through clang array checking script (C & C++).
* check_deprecated: Check if there is any deprecated code to remove.
* check_splint: Run blenders source through splint (C only).
* check_sparse: Run blenders source through sparse (C only).
* check_smatch: Run blenders source through smatch (C only).
* check_descriptions: Check for duplicate/invalid descriptions.
* check_licenses: Check license headers follow the SPDX license specification,
using one of the accepted licenses in 'doc/license/SPDX-license-identifiers.txt'
@ -474,21 +471,6 @@ check_clang_array: .FORCE
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang_array.py"
check_splint: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_splint.py"
check_sparse: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_sparse.py"
check_smatch: .FORCE
@$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
check_mypy: .FORCE
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"

View File

@ -90,28 +90,26 @@ include(cmake/haru.cmake)
# Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
include(cmake/boost.cmake)
include(cmake/pugixml.cmake)
include(cmake/ispc.cmake)
include(cmake/openimagedenoise.cmake)
include(cmake/embree.cmake)
include(cmake/openpgl.cmake)
include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
include(cmake/xml2.cmake)
include(cmake/fribidi.cmake)
include(cmake/harfbuzz.cmake)
if(NOT APPLE)
include(cmake/xr_openxr.cmake)
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
endif()
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
if(NOT WIN32)
include(cmake/igc.cmake)
include(cmake/gmmlib.cmake)
include(cmake/ocloc.cmake)
endif()
endif()
include(cmake/ispc.cmake)
include(cmake/openimagedenoise.cmake)
# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
include(cmake/embree.cmake)
include(cmake/openpgl.cmake)
include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
include(cmake/xml2.cmake)
# OpenColorIO and dependencies.
include(cmake/expat.cmake)

View File

@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
download_source(ICDLOADER)
download_source(MP11)
download_source(SPIRV_HEADERS)
download_source(UNIFIED_RUNTIME)
download_source(IGC)
download_source(IGC_LLVM)
download_source(IGC_OPENCL_CLANG)

View File

@ -5,6 +5,9 @@
# for now.
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
if(WIN32)
set(LLVM_GENERATOR "Ninja")
else()
@ -38,17 +41,18 @@ set(DPCPP_EXTRA_ARGS
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
-DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
# Below here is copied from an invocation of buildbot/config.py
-DLLVM_ENABLE_ASSERTIONS=ON
-DLLVM_TARGETS_TO_BUILD=X86
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
-DLIBCLC_TARGETS_TO_BUILD=
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
@ -104,13 +108,19 @@ add_dependencies(
external_mp11
external_level-zero
external_spirvheaders
external_unifiedruntime
)
if(BUILD_MODE STREQUAL Release AND WIN32)
ExternalProject_Add_Step(external_dpcpp after_install
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld.lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld64.lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe
)
endif()

View File

@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
ExternalProject_Add(external_unifiedruntime
URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/unifiedruntime
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)

View File

@ -3,6 +3,8 @@
# Note the utility apps may use png/tiff/gif system libraries, but the
# library itself does not depend on them, so should give no problems.
set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
set(EMBREE_EXTRA_ARGS
-DEMBREE_ISPC_SUPPORT=OFF
-DEMBREE_TUTORIALS=OFF
@ -31,6 +33,43 @@ if(NOT BLENDER_PLATFORM_ARM)
)
endif()
if(NOT APPLE)
if(WIN32)
# Levels below -O2 don't work well for Embree+SYCL.
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
set(EMBREE_CMAKE_FLAGS
-DCMAKE_BUILD_TYPE=${BUILD_MODE}
-DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
-DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
-DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
-DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
-DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
-DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
-DCMAKE_CXX_STANDARD=17
)
set(EMBREE_EXTRA_ARGS
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
-DEMBREE_SYCL_SUPPORT=ON
${EMBREE_EXTRA_ARGS}
)
else()
set(EMBREE_EXTRA_ARGS
-DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
-DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
-DEMBREE_SYCL_SUPPORT=ON
${EMBREE_EXTRA_ARGS}
)
endif()
endif()
if(TBB_STATIC_LIBRARY)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
@ -42,16 +81,25 @@ ExternalProject_Add(external_embree
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/embree
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/embree
)
add_dependencies(
external_embree
external_tbb
)
if(NOT APPLE)
add_dependencies(
external_embree
external_tbb
external_dpcpp
)
else()
add_dependencies(
external_embree
external_tbb
)
endif()
if(WIN32)
if(BUILD_MODE STREQUAL Release)
@ -66,6 +114,7 @@ if(WIN32)
ExternalProject_Add_Step(external_embree after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
DEPENDEES install
)
endif()

View File

@ -74,6 +74,27 @@ if(WIN32)
set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
# Set similar flags for CLANG compilation.
set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
if(WITH_OPTIMIZED_DEBUG)
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
else()
set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
endif()
set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
if(WITH_OPTIMIZED_DEBUG)
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
else()
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
endif()
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
set(PLATFORM_FLAGS)
set(PLATFORM_CXX_FLAGS)
set(PLATFORM_CMAKE_FLAGS)

View File

@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
set(OPENPGL_HASH_TYPE SHA256)
set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)
set(LEVEL_ZERO_VERSION v1.8.5)
set(LEVEL_ZERO_VERSION v1.8.8)
set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
set(LEVEL_ZERO_HASH_TYPE SHA256)
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
set(DPCPP_VERSION 20221019)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
set(DPCPP_VERSION 2022-12)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
set(DPCPP_HASH_TYPE SHA256)
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
# will take care of building them, unpack is being done in dpcpp_deps.cmake
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
set(VCINTRINSICS_HASH_TYPE SHA256)
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
set(SPIRV_HEADERS_HASH_TYPE SHA256)
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
######################
### DPCPP DEPS END ###
######################
@ -730,9 +737,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 22.49.25018.21)
set(OCLOC_VERSION 23.05.25593.18)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
external_dpcpp -- external_mp11;
external_dpcpp -- external_level_zero;
external_dpcpp -- external_spirvheaders;
external_dpcpp -- external_unifiedruntime;
external_embree -- external_tbb;
external_ffmpeg -- external_zlib;
external_ffmpeg -- external_openjpeg;

View File

@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
libsycldevice-obj
libsycldevice-spv)
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 17eeaafae194..09e6d2217aaa 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
}
assert(BinImages.size() > 0 && "Expected to find at least one device image");
+ // Ignore images with incompatible state. Image is considered compatible
+ // with a target state if an image is already in the target state or can
+ // be brought to target state by compiling/linking/building.
+ //
+ // Example: an image in "executable" state is not compatible with
+ // "input" target state - there is no operation to convert the image it
+ // to "input" state. An image in "input" state is compatible with
+ // "executable" target state because it can be built to get into
+ // "executable" state.
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
+ if (getBinImageState(*It) > TargetState)
+ It = BinImages.erase(It);
+ else
+ ++It;
+ }
+
std::vector<device_image_plain> SYCLDeviceImages;
- for (RTDeviceBinaryImage *BinImage : BinImages) {
- const bundle_state ImgState = getBinImageState(BinImage);
-
- // Ignore images with incompatible state. Image is considered compatible
- // with a target state if an image is already in the target state or can
- // be brought to target state by compiling/linking/building.
- //
- // Example: an image in "executable" state is not compatible with
- // "input" target state - there is no operation to convert the image it
- // to "input" state. An image in "input" state is compatible with
- // "executable" target state because it can be built to get into
- // "executable" state.
- if (ImgState > TargetState)
- continue;
- for (const sycl::device &Dev : Devs) {
+ // If a non-input state is requested, we can filter out some compatible
+ // images and return only those with the highest compatible state for each
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
+ // image, so that any unneeded ones are skipped.
+ // TODO this has no effect if the requested state is input, consider having
+ // a separate branch for that case to avoid unnecessary tracking work.
+ struct DeviceBinaryImageInfo {
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+ bundle_state State = bundle_state::input;
+ int RequirementCounter = 0;
+ };
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
+
+ for (const sycl::device &Dev : Devs) {
+ // Track the highest image state for each requested kernel.
+ using StateImagesPairT =
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
+ using KernelImageMapT =
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
+ KernelImageMapT KernelImageMap;
+ if (!KernelIDs.empty())
+ for (const kernel_id &KernelID : KernelIDs)
+ KernelImageMap.insert({KernelID, {}});
+
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
if (!compatibleWithDevice(BinImage, Dev) ||
!doesDevSupportImgAspects(Dev, *BinImage))
continue;
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
- // Collect kernel names for the image
- {
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
- KernelIDs = m_BinImg2KernelIDs[BinImage];
- // If the image does not contain any non-service kernels we can skip it.
- if (!KernelIDs || KernelIDs->empty())
- continue;
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
+ if (InsertRes.second) {
+ ImgInfo.State = getBinImageState(BinImage);
+ // Collect kernel names for the image
+ {
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
+ }
}
+ const bundle_state ImgState = ImgInfo.State;
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
+ ImgInfo.KernelIDs;
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
+ // If the image does not contain any non-service kernels we can skip it.
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
+ continue;
- SYCLDeviceImages.push_back(
- createSyclObjFromImpl<device_image_plain>(Impl));
- break;
+ // Update tracked information.
+ for (kernel_id &KernelID : *ImageKernelIDs) {
+ StateImagesPairT *StateImagesPair;
+ // If only specific kernels are requested, ignore the rest.
+ if (!KernelIDs.empty()) {
+ auto It = KernelImageMap.find(KernelID);
+ if (It == KernelImageMap.end())
+ continue;
+ StateImagesPair = &It->second;
+ } else
+ StateImagesPair = &KernelImageMap[KernelID];
+
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+
+ if (KernelImages.empty()) {
+ KernelImagesState = ImgState;
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState < ImgState) {
+ for (RTDeviceBinaryImage *Img : KernelImages) {
+ auto It = ImageInfoMap.find(Img);
+ assert(It != ImageInfoMap.end());
+ assert(It->second.RequirementCounter > 0);
+ --(It->second.RequirementCounter);
+ }
+ KernelImages.clear();
+ KernelImages.push_back(BinImage);
+ KernelImagesState = ImgState;
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState == ImgState) {
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ }
+ }
}
}
+ for (const auto &ImgInfoPair : ImageInfoMap) {
+ if (ImgInfoPair.second.RequirementCounter == 0)
+ continue;
+
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
+
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
+ }
+
return SYCLDeviceImages;
}

View File

@ -149,3 +149,19 @@ index 074f910a2..30f490818 100644
return is_hit_first | is_hit_second;
}
};
diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
--- a/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:23:06.429190200 +0200
+++ b/kernels/sycl/rthwif_embree_builder.cpp 2023-03-28 17:35:01.291938600 +0200
@@ -540,7 +540,12 @@
assert(offset <= geomDescrData.size());
}
+ /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
+#if TBB_VERSION_MAJOR<2021
+ RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
+#else
RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
+#endif
/* estimate static accel size */
BBox1f time_range(0,1);

View File

@ -37,18 +37,24 @@ elseif(HIP_HIPCC_EXECUTABLE)
set(HIP_VERSION_MINOR 0)
set(HIP_VERSION_PATCH 0)
if(WIN32)
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE}.bat)
else()
set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
endif()
# Get version from the output.
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
OUTPUT_VARIABLE HIP_VERSION_RAW
execute_process(COMMAND ${_hipcc_executable} --version
OUTPUT_VARIABLE _hip_version_raw
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Parse parts.
if(HIP_VERSION_RAW MATCHES "HIP version: .*")
if(_hip_version_raw MATCHES "HIP version: .*")
# Strip the HIP prefix and get list of individual version components.
string(REGEX REPLACE
".*HIP version: ([.0-9]+).*" "\\1"
HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
HIP_SEMANTIC_VERSION "${_hip_version_raw}")
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
@ -71,7 +77,13 @@ elseif(HIP_HIPCC_EXECUTABLE)
# Construct full semantic version.
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
unset(HIP_VERSION_RAW)
unset(_hip_version_raw)
unset(_hipcc_executable)
else()
set(HIP_FOUND FALSE)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HIP
REQUIRED_VARS HIP_HIPCC_EXECUTABLE
VERSION_VAR HIP_VERSION)

View File

@ -1,58 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "smatch"
CHECKER_ARGS = [
"--full-path",
"--two-passes",
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs] +
source_defines
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (percent_str, c))
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@ -1,56 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "sparse"
CHECKER_ARGS = [
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs] +
source_defines
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (percent_str, c))
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@ -1,86 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "splint"
CHECKER_ARGS = [
"-weak",
"-posix-lib",
"-linelen", "10000",
"+ignorequals",
"+relaxtypes",
"-retvalother",
"+matchanyintegral",
"+longintegral",
"+ignoresigns",
"-nestcomment",
"-predboolothers",
"-ifempty",
"-unrecogcomments",
# we may want to remove these later
"-type",
"-fixedformalarray",
"-fullinitblock",
"-fcnuse",
"-initallelements",
"-castfcnptr",
# -forcehints,
"-bufferoverflowhigh", # warns a lot about sprintf()
# re-definitions, rna causes most of these
"-redef",
"-syntax",
# dummy, witjout this splint complains with:
# /usr/include/bits/confname.h:31:27: *** Internal Bug at cscannerHelp.c:2428: Unexpanded macro not function or constant: int _PC_MAX_CANON
"-D_PC_MAX_CANON=0",
]
import project_source_info
import subprocess
import sys
import os
USE_QUIET = (os.environ.get("QUIET", None) is not None)
def main():
source_info = project_source_info.build_info(use_cxx=False, ignore_prefix_list=CHECKER_IGNORE_PREFIX)
check_commands = []
for c, inc_dirs, defs in source_info:
cmd = ([CHECKER_BIN] +
CHECKER_ARGS +
[c] +
[("-I%s" % i) for i in inc_dirs] +
[("-D%s" % d) for d in defs]
)
check_commands.append((c, cmd))
def my_process(i, c, cmd):
if not USE_QUIET:
percent = 100.0 * (i / len(check_commands))
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.write("%s %s\n" % (percent_str, c))
sys.stdout.flush()
return subprocess.Popen(cmd)
process_functions = []
for i, (c, cmd) in enumerate(check_commands):
process_functions.append((my_process, (i, c, cmd)))
project_source_info.queue_processes(process_functions)
if __name__ == "__main__":
main()

View File

@ -82,7 +82,7 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@ -1384,4 +1384,3 @@ macro(windows_process_platform_bundled_libraries library_deps)
endforeach()
endif()
endmacro()

View File

@ -174,7 +174,7 @@ if(SYSTEMSTUBS_LIBRARY)
list(APPEND PLATFORM_LINKLIBS SystemStubs)
endif()
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing")
string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing -ffp-contract=off")
set(PLATFORM_LINKFLAGS
"-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio -framework Metal -framework QuartzCore"
)

View File

@ -335,18 +335,6 @@ if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_
unset(_sycl_runtime_libraries)
endif()
if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_CYCLES_ONEAPI_BINARIES)
set(cycles_kernel_oneapi_lib_suffix "_aot")
else()
set(cycles_kernel_oneapi_lib_suffix "_jit")
endif()
list(APPEND PLATFORM_BUNDLED_LIBRARIES
${CMAKE_CURRENT_BINARY_DIR}/intern/cycles/kernel/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so
)
unset(cycles_kernel_oneapi_lib_suffix)
endif()
if(WITH_OPENVDB)
find_package(OpenVDB)
set_and_warn_library_found("OpenVDB" OPENVDB_FOUND WITH_OPENVDB)
@ -815,8 +803,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
# Automatically turned on when building with "-march=native". This is
# explicitly turned off here as it will make floating point math give a bit
# different results. This will lead to automated test failures. So disable
# this until we support it. Seems to default to off in clang and the intel
# compiler.
# this until we support it.
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
# `maybe-uninitialized` is unreliable in release builds, but fine in debug builds.
@ -904,7 +891,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
# CLang is the same as GCC for now.
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing")
set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")
if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
find_program(MOLD_BIN "mold")

View File

@ -1105,18 +1105,6 @@ if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_
set(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
endif()
if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_CYCLES_ONEAPI_BINARIES)
set(cycles_kernel_oneapi_lib_suffix "_aot")
else()
set(cycles_kernel_oneapi_lib_suffix "_jit")
endif()
list(APPEND PLATFORM_BUNDLED_LIBRARIES
${CMAKE_CURRENT_BINARY_DIR}/intern/cycles/kernel/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.dll
)
unset(cycles_kernel_oneapi_lib_suffix)
endif()
# Environment variables to run precompiled executables that needed libraries.
list(JOIN PLATFORM_BUNDLED_LIBRARY_DIRS ";" _library_paths)

View File

@ -9,7 +9,7 @@ buildbot:
cuda11:
version: '11.4.1'
hip:
version: '5.3.22480'
version: '5.5.30571'
optix:
version: '7.3.0'
ocloc:

View File

@ -865,29 +865,40 @@ Unfortunate Corner Cases
Besides all expected cases listed above, there are a few others that should not be
an issue but, due to internal implementation details, currently are:
- ``Object.hide_viewport``, ``Object.hide_select`` and ``Object.hide_render``:
Setting any of those Booleans will trigger a rebuild of Collection caches,
thus breaking any current iteration over ``Collection.all_objects``.
Collection Objects
^^^^^^^^^^^^^^^^^^
Changing: ``Object.hide_viewport``, ``Object.hide_select`` or ``Object.hide_render``
will trigger a rebuild of Collection caches, thus breaking any current iteration over ``Collection.all_objects``.
.. rubric:: Do not:
.. code-block:: python
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
.. rubric:: Do not:
.. rubric:: Do:
.. code-block:: python
.. code-block:: python
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
.. rubric:: Do:
Data-Blocks Renaming During Iteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: python
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
Data-blocks accessed from ``bpy.data`` are sorted when their name is set.
Any loop that iterates of a data such as ``bpy.data.objects`` for example,
and sets the objects ``name`` must get all items from the iterator first (typically by converting to a list or tuple)
to avoid missing some objects and iterating over others multiple times.
sys.exit

View File

@ -572,7 +572,7 @@ template<class T> inline bool cmpMinMax(T &minv, T &maxv, const T &val)
}
template<> inline bool cmpMinMax<Vec3>(Vec3 &minv, Vec3 &maxv, const Vec3 &val)
{
return (cmpMinMax(minv.x, maxv.x, val.x) | cmpMinMax(minv.y, maxv.y, val.y) |
return (cmpMinMax(minv.x, maxv.x, val.x) || cmpMinMax(minv.y, maxv.y, val.y) ||
cmpMinMax(minv.z, maxv.z, val.z));
}

View File

@ -281,6 +281,9 @@ endif()
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
add_definitions(-DWITH_EMBREE_GPU)
endif()
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
include_directories(
SYSTEM

View File

@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
default=False,
)
use_oneapirt: BoolProperty(
name="Embree on GPU (Experimental)",
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
"However this support is experimental and some scenes may render incorrectly",
default=False,
)
kernel_optimization_level: EnumProperty(
name="Kernel Optimization",
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
@ -1676,16 +1683,16 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
elif device_type == 'HIP':
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
driver_version = "22.10"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
row = layout.row()
row.use_property_split = True
row.prop(self, "use_oneapirt")
def draw(self, context):
self.draw_impl(self.layout, context)

View File

@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
device.has_peer_memory = false;
}
if (get_boolean(cpreferences, "use_metalrt")) {
device.use_metalrt = true;
bool accumulated_use_hardware_raytracing = false;
foreach (
DeviceInfo &info,
(device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
info.use_hardware_raytracing = false;
}
if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
info.use_hardware_raytracing = false;
}
/* There is an accumulative logic here, because Multi-devices are support only for
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
* global boolean backend setting (see above) for enabling/disabling HW RT,
* so all sub-devices in the multi-device should enable (or disable) HW RT
* simultaneously (and CPU device are expected to ignore `use_hardware_raytracing` setting). */
accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
}
device.use_hardware_raytracing = accumulated_use_hardware_raytracing;
if (preview) {
/* Disable specialization for preview renders. */

View File

@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
#endif /* WITH_EMBREE */
#ifdef WITH_EMBREE_GPU
PyModule_AddObject(mod, "with_embree_gpu", Py_True);
Py_INCREF(Py_True);
#else /* WITH_EMBREE_GPU */
PyModule_AddObject(mod, "with_embree_gpu", Py_False);
Py_INCREF(Py_False);
#endif /* WITH_EMBREE_GPU */
if (ccl::openimagedenoise_supported()) {
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
Py_INCREF(Py_True);

View File

@ -1061,7 +1061,7 @@ void BlenderSession::ensure_display_driver_if_needed()
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(
b_engine, b_scene, background);
display_driver_ = display_driver.get();
session->set_display_driver(move(display_driver));
session->set_display_driver(std::move(display_driver));
}
CCL_NAMESPACE_END

View File

@ -606,7 +606,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
for (size_t i = 0; i < bvh_nodes_size;) {
size_t nsize, nsize_bbox;
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
nsize = BVH_UNALIGNED_NODE_SIZE;

View File

@ -111,9 +111,13 @@ BVHEmbree::~BVHEmbree()
}
}
void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
void BVHEmbree::build(Progress &progress,
Stats *stats,
RTCDevice rtc_device_,
const bool rtc_device_is_sycl_)
{
rtc_device = rtc_device_;
rtc_device_is_sycl = rtc_device_is_sycl_;
assert(rtc_device);
rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
@ -266,15 +270,29 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
const int *triangles = mesh->get_triangles().data();
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
if (!rtc_device_is_sycl) {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
}
else {
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
* happen on GPU, and we cannot use standard host pointers at this point. So instead
* of making a shared geometry buffer - a new Embree buffer will be created and data
* will be copied. */
int *triangles_buffer = (int *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
assert(triangles_buffer);
if (triangles_buffer) {
static_assert(sizeof(int) == sizeof(uint));
std::memcpy(triangles_buffer, triangles, sizeof(int) * 3 * (num_triangles));
}
}
set_tri_vertex_buffer(geom_id, mesh, false);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
@ -323,14 +341,38 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
}
else {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
if (!rtc_device_is_sycl) {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
}
else {
/* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
* happen on GPU, and we cannot use standard host pointers at this point. So instead
* of making a shared geometry buffer - a new Embree buffer will be created and data
* will be copied. */
/* As float3 is packed on GPU side, we map it to packed_float3. */
packed_float3 *verts_buffer = (packed_float3 *)rtcSetNewGeometryBuffer(
geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
sizeof(packed_float3),
num_verts + 1);
assert(verts_buffer);
if (verts_buffer) {
for (size_t i = (size_t)0; i < num_verts + 1; ++i) {
verts_buffer[i].x = verts[i].x;
verts_buffer[i].y = verts[i].y;
verts_buffer[i].z = verts[i].z;
}
}
}
}
}
}

View File

@ -29,7 +29,10 @@ class PointCloud;
class BVHEmbree : public BVH {
public:
void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
void build(Progress &progress,
Stats *stats,
RTCDevice rtc_device,
const bool isSyclEmbreeDevice = false);
void refit(Progress &progress);
RTCScene scene;
@ -55,6 +58,7 @@ class BVHEmbree : public BVH {
const bool update);
RTCDevice rtc_device;
bool rtc_device_is_sycl;
enum RTCBuildQuality build_quality;
};

View File

@ -42,15 +42,19 @@ endif()
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
if(UNIX)
# Disabled until there is a HIP 5.5 release for Linux.
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
else()
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
find_package(HIP 5.5.0)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# find_package(HIP)
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# if(HIP_FOUND)
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
# endif()
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
endif()
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
texture_info.free();
}
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
#ifdef WITH_EMBREE

View File

@ -56,7 +56,7 @@ class CPUDevice : public Device {
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
~CPUDevice();
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */

View File

@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
return path_exists(cubins_path);
}
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
return BVH_LAYOUT_BVH2;
}

View File

@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {
static bool have_precompiled_kernels();
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;

View File

@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_guiding = true;
info.has_profiling = true;
info.has_peer_memory = false;
info.use_metalrt = false;
info.use_hardware_raytracing = false;
info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_guiding &= device.has_guiding;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
info.use_metalrt |= device.use_metalrt;
info.use_hardware_raytracing |= device.use_hardware_raytracing;
info.denoisers &= device.denoisers;
}

View File

@ -71,15 +71,16 @@ class DeviceInfo {
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
*/
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
* kernels (Metal only). */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
@ -101,7 +102,7 @@ class DeviceInfo {
has_profiling = false;
has_peer_memory = false;
has_gpu_queue = false;
use_metalrt = false;
use_hardware_raytracing = false;
denoisers = DENOISER_NONE;
}
@ -157,7 +158,7 @@ class Device {
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;
/* statistics */
Stats &stats;

View File

@ -20,7 +20,7 @@ class DummyDevice : public Device {
~DummyDevice() {}
virtual BVHLayoutMask get_bvh_layout_mask() const override
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
{
return 0;
}

View File

@ -137,7 +137,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
info.num = num;
info.has_nanovdb = true;
info.has_light_tree = false;
info.has_light_tree = true;
info.denoisers = 0;
info.has_gpu_queue = true;

View File

@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
return path_exists(fatbins_path);
}
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
return BVH_LAYOUT_BVH2;
}

View File

@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {
static bool have_precompiled_kernels();
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;

View File

@ -3,7 +3,9 @@
#include "device/kernel.h"
#include "util/log.h"
#ifndef __KERNEL_ONEAPI__
# include "util/log.h"
#endif
CCL_NAMESPACE_BEGIN
@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
case DEVICE_KERNEL_NUM:
break;
};
#ifndef __KERNEL_ONEAPI__
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
#endif
return "UNKNOWN";
}
#ifndef __KERNEL_ONEAPI__
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
{
os << device_kernel_as_string(kernel);
@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)
return str;
}
#endif
CCL_NAMESPACE_END

View File

@ -3,11 +3,13 @@
#pragma once
#include "kernel/types.h"
#ifndef __KERNEL_ONEAPI__
# include "kernel/types.h"
#include "util/string.h"
# include "util/string.h"
#include <ostream> // NOLINT
# include <ostream> // NOLINT
#endif
CCL_NAMESPACE_BEGIN
@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
bool device_kernel_has_intersection(DeviceKernel kernel);
const char *device_kernel_as_string(DeviceKernel kernel);
#ifndef __KERNEL_ONEAPI__
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
typedef uint64_t DeviceKernelMask;
string device_kernel_mask_as_string(DeviceKernelMask mask);
#endif
CCL_NAMESPACE_END

View File

@ -100,7 +100,7 @@ class MetalDevice : public Device {
virtual void cancel() override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;

View File

@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
return get_device_by_ID(ID, lock) == nullptr;
}
BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
}
@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
case METAL_GPU_AMD: {
max_threads_per_threadgroup = 128;
use_metalrt = info.use_metalrt;
use_metalrt = info.use_hardware_raytracing;
break;
}
case METAL_GPU_APPLE: {
max_threads_per_threadgroup = 512;
use_metalrt = info.use_metalrt;
use_metalrt = info.use_hardware_raytracing;
break;
}
}

View File

@ -96,12 +96,13 @@ class MultiDevice : public Device {
return error_msg;
}
virtual BVHLayoutMask get_bvh_layout_mask() const override
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
foreach (const SubDevice &sub_device, devices) {
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
kernel_features);
bvh_layout_mask &= device_bvh_layout_mask;
bvh_layout_mask_all |= device_bvh_layout_mask;
}

View File

@ -40,12 +40,12 @@ bool device_oneapi_init()
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
}
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
_putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
}
else {
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
}
}
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
@ -58,10 +58,10 @@ bool device_oneapi_init()
setenv("SYCL_CACHE_PERSISTENT", "1", false);
setenv("SYCL_CACHE_THRESHOLD", "0", false);
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
}
else {
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
}
setenv("SYCL_ENABLE_PCI", "1", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
}
#ifdef WITH_ONEAPI
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
static void device_iterator_cb(
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
{
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
@ -112,6 +113,13 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
info.display_device = false;
# ifdef WITH_EMBREE_GPU
info.use_hardware_raytracing = hwrt_support;
# else
info.use_hardware_raytracing = false;
(void)hwrt_support;
# endif
devices->push_back(info);
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
}

View File

@ -8,7 +8,19 @@
# include "util/debug.h"
# include "util/log.h"
# ifdef WITH_EMBREE_GPU
# include "bvh/embree.h"
# endif
# include "kernel/device/oneapi/globals.h"
# include "kernel/device/oneapi/kernel.h"
# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
/* These declarations are missing from embree headers when compiling from a compiler that doesn't
* support SYCL. */
extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
# endif
CCL_NAMESPACE_BEGIN
@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler),
device_queue_(nullptr),
# ifdef WITH_EMBREE_GPU
embree_device(nullptr),
embree_scene(nullptr),
# endif
texture_info_(this, "texture_info", MEM_GLOBAL),
kg_memory_(nullptr),
kg_memory_device_(nullptr),
kg_memory_size_(0)
{
need_texture_info_ = false;
use_hardware_raytracing = info.use_hardware_raytracing;
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
bool is_finished_ok = create_queue(device_queue_, info.num);
bool is_finished_ok = create_queue(device_queue_,
info.num,
# ifdef WITH_EMBREE_GPU
use_hardware_raytracing ? &embree_device : nullptr
# else
nullptr
# endif
);
if (is_finished_ok == false) {
set_error("oneAPI queue initialization error: got runtime exception \"" +
oneapi_error_string_ + "\"");
@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
assert(device_queue_);
}
# ifdef WITH_EMBREE_GPU
use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
# else
use_hardware_raytracing = false;
# endif
if (use_hardware_raytracing) {
VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
}
size_t globals_segment_size;
is_finished_ok = kernel_globals_size(globals_segment_size);
if (is_finished_ok == false) {
@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
OneapiDevice::~OneapiDevice()
{
# ifdef WITH_EMBREE_GPU
if (embree_device)
rtcReleaseDevice(embree_device);
# endif
texture_info_.free();
usm_free(device_queue_, kg_memory_);
usm_free(device_queue_, kg_memory_device_);
@ -80,15 +120,47 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
return false;
}
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
{
return BVH_LAYOUT_BVH2;
/* MNEE and Ray-trace kernels currently don't work correctly with HWRT. */
return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
}
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
{
return (use_hardware_raytracing &&
can_use_hardware_raytracing_for_features(requested_features)) ?
BVH_LAYOUT_EMBREE :
BVH_LAYOUT_BVH2;
}
# ifdef WITH_EMBREE_GPU
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
if (refit) {
bvh_embree->refit(progress);
}
else {
bvh_embree->build(progress, &stats, embree_device, true);
}
if (bvh->params.top_level) {
embree_scene = bvh_embree->scene;
}
}
else {
Device::build_bvh(bvh, progress, refit);
}
}
# endif
bool OneapiDevice::load_kernels(const uint requested_features)
{
assert(device_queue_);
kernel_features = requested_features;
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
if (is_finished_ok == false) {
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
@ -100,7 +172,14 @@ bool OneapiDevice::load_kernels(const uint requested_features)
assert(device_queue_);
}
is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
VLOG_INFO
<< "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
use_hardware_raytracing = false;
}
is_finished_ok = oneapi_load_kernels(
device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
if (is_finished_ok == false) {
set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
}
@ -327,6 +406,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
<< string_human_readable_number(size) << " bytes. ("
<< string_human_readable_size(size) << ")";
# ifdef WITH_EMBREE_GPU
if (strcmp(name, "data") == 0) {
assert(size <= sizeof(KernelData));
/* Update scene handle(since it is different for each device on multi devices) */
KernelData *const data = (KernelData *)host;
data->device_bvh = embree_scene;
}
# endif
ConstMemMap::iterator i = const_mem_map_.find(name);
device_vector<uchar> *data;
@ -446,7 +535,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
# endif
}
bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
bool OneapiDevice::create_queue(SyclQueue *&external_queue,
int device_index,
void *embree_device_pointer)
{
bool finished_correct = true;
try {
@ -457,6 +548,13 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
sycl::queue *created_queue = new sycl::queue(devices[device_index],
sycl::property::queue::in_order());
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
# ifdef WITH_EMBREE_GPU
if (embree_device_pointer) {
*((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
}
# else
(void)embree_device_pointer;
# endif
}
catch (sycl::exception const &e) {
finished_correct = false;
@ -625,7 +723,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
size_t global_size,
void **args)
{
return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
return oneapi_enqueue_kernel(
kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
}
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
@ -767,9 +866,9 @@ char *OneapiDevice::device_capabilities()
sycl::id<3> max_work_item_sizes =
device.get_info<sycl::info::device::max_work_item_sizes<3>>();
WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
WRITE_ATTR(max_work_item_sizes_dim0, ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR(max_work_item_sizes_dim1, ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR(max_work_item_sizes_dim2, ((size_t)max_work_item_sizes.get(2)))
GET_NUM_ATTR(max_work_group_size)
GET_NUM_ATTR(max_num_sub_groups)
@ -792,7 +891,7 @@ char *OneapiDevice::device_capabilities()
GET_NUM_ATTR(native_vector_width_half)
size_t max_clock_frequency = device.get_info<sycl::info::device::max_clock_frequency>();
WRITE_ATTR("max_clock_frequency", max_clock_frequency)
WRITE_ATTR(max_clock_frequency, max_clock_frequency)
GET_NUM_ATTR(address_bits)
GET_NUM_ATTR(max_mem_alloc_size)
@ -801,7 +900,7 @@ char *OneapiDevice::device_capabilities()
* supported so we always return false, even if device supports HW texture usage acceleration.
*/
bool image_support = false;
WRITE_ATTR("image_support", (size_t)image_support)
WRITE_ATTR(image_support, (size_t)image_support)
GET_NUM_ATTR(max_parameter_size)
GET_NUM_ATTR(mem_base_addr_align)
@ -830,12 +929,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
std::string name = device.get_info<sycl::info::device::name>();
# else
std::string name = "SYCL Host Task (Debug)";
# endif
# ifdef WITH_EMBREE_GPU
bool hwrt_support = rtcIsSYCLDeviceSupported(device);
# else
bool hwrt_support = false;
# endif
std::string id = "ONEAPI_" + platform_name + "_" + name;
if (device.has(sycl::aspect::ext_intel_pci_address)) {
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
}
(cb)(id.c_str(), name.c_str(), num, user_ptr);
(cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
num++;
}
}

View File

@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
const char *name,
int num,
void *user_ptr);
typedef void (*OneAPIDeviceIteratorCallback)(
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);
class OneapiDevice : public Device {
private:
SyclQueue *device_queue_;
# ifdef WITH_EMBREE_GPU
RTCDevice embree_device;
RTCScene embree_scene;
# endif
using ConstMemMap = map<string, device_vector<uchar> *>;
ConstMemMap const_mem_map_;
device_vector<TextureInfo> texture_info_;
@ -34,17 +35,21 @@ class OneapiDevice : public Device {
size_t kg_memory_size_ = (size_t)0;
size_t max_memory_on_device_ = (size_t)0;
std::string oneapi_error_string_;
bool use_hardware_raytracing = false;
unsigned int kernel_features = 0;
public:
virtual BVHLayoutMask get_bvh_layout_mask() const override;
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~OneapiDevice();
# ifdef WITH_EMBREE_GPU
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
# endif
bool check_peer_access(Device *peer_device) override;
bool load_kernels(const uint requested_features) override;
bool load_kernels(const uint kernel_features) override;
void load_texture_info();
@ -113,8 +118,9 @@ class OneapiDevice : public Device {
SyclQueue *sycl_queue();
protected:
bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
bool create_queue(SyclQueue *&external_queue, int device_index);
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
void free_queue(SyclQueue *queue);
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);

View File

@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
return make_unique<OptiXDeviceQueue>(this);
}
BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
{
/* OptiX has its own internal acceleration structure format. */
return BVH_LAYOUT_OPTIX;

View File

@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
~OptiXDevice();
BVHLayoutMask get_bvh_layout_mask() const override;
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
string compile_kernel_get_common_cflags(const uint kernel_features);

View File

@ -574,7 +574,7 @@ void PathTrace::denoise(const RenderWork &render_work)
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
{
output_driver_ = move(driver);
output_driver_ = std::move(driver);
}
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
@ -585,7 +585,7 @@ void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
destroy_gpu_resources();
if (driver) {
display_ = make_unique<PathTraceDisplay>(move(driver));
display_ = make_unique<PathTraceDisplay>(std::move(driver));
}
else {
display_ = nullptr;

View File

@ -9,7 +9,9 @@
CCL_NAMESPACE_BEGIN
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) {}
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(std::move(driver))
{
}
void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
{

View File

@ -28,6 +28,7 @@ static size_t estimate_single_state_size(const uint kernel_features)
#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
state_size += (kernel_features & (feature)) ? sizeof(type) : 0;
#define KERNEL_STRUCT_END(name) \
(void)array_index; \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@ -139,6 +140,7 @@ void PathTraceWorkGPU::alloc_integrator_soa()
integrator_state_gpu_.parent_struct[array_index].name = (type *)array->device_pointer; \
}
#define KERNEL_STRUCT_END(name) \
(void)array_index; \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@ -299,8 +301,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
* become busy after adding new tiles). This is especially important for the shadow catcher which
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
0);
work_tile_scheduler_.set_accelerated_rt(
(device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,

View File

@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
device/oneapi/compat.h
device/oneapi/context_begin.h
device/oneapi/context_end.h
device/oneapi/context_intersect_begin.h
device/oneapi/context_intersect_end.h
device/oneapi/globals.h
device/oneapi/image.h
device/oneapi/kernel.h
device/oneapi/kernel_templates.h
device/cpu/bvh.h
)
set(SRC_KERNEL_CLOSURE_HEADERS
@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# Set defaults for spir64 and spir64_gen options
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
endif()
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
@ -775,8 +778,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# Host execution won't use GPU binaries, no need to compile them.
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
# AoT binaries aren't currently reused when calling sycl::build.
list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
# Iterate over all targest and their options
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@ -798,6 +799,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
-I"${NANOVDB_INCLUDE_DIR}")
endif()
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
list(APPEND sycl_compiler_flags
-DWITH_EMBREE
-DWITH_EMBREE_GPU
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
-I"${EMBREE_INCLUDE_DIRS}")
if(WIN32)
list(APPEND sycl_compiler_flags
-ladvapi32.lib
)
endif()
set(next_library_mode "")
foreach(library ${EMBREE_LIBRARIES})
string(TOLOWER "${library}" library_lower)
if(("${library_lower}" STREQUAL "optimized") OR
("${library_lower}" STREQUAL "debug"))
set(next_library_mode "${library_lower}")
else()
if(next_library_mode STREQUAL "")
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
else()
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
endif()
set(next_library_mode "")
endif()
endforeach()
foreach(library ${TBB_LIBRARIES})
string(TOLOWER "${library}" library_lower)
if(("${library_lower}" STREQUAL "optimized") OR
("${library_lower}" STREQUAL "debug"))
set(next_library_mode "${library_lower}")
else()
if(next_library_mode STREQUAL "")
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
else()
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
endif()
set(next_library_mode "")
endif()
endforeach()
list(APPEND sycl_compiler_flags
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
)
endif()
if(WITH_CYCLES_DEBUG)
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
endif()
@ -907,13 +961,22 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
DEPENDS ${cycles_oneapi_kernel_sources})
endif()
# For the Cycles standalone put libraries next to the Cycles application.
if(NOT WITH_BLENDER)
if(WIN32)
delayed_install("" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH})
else()
delayed_install("" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
endif()
# For the Cycles standalone put libraries next to the Cycles application.
set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH})
else()
# For Blender put the libraries next to the Blender executable.
#
# Note that the installation path in the delayed_install is relative to the versioned folder,
# which means we need to go one level up.
set(cycles_oneapi_target_path "../")
endif()
# install dynamic libraries required at runtime
if(WIN32)
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path})
elseif(UNIX AND NOT APPLE)
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib)
endif()
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})

View File

@ -21,6 +21,28 @@
# define __BVH2__
#endif
#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
/* bool is apparently not tested for specialization constants:
* https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
* Instead of adding one more bool specialization constant, we reuse existing embree_features one
* and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
*/
/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and ray-trace kernels
* pre-compiled without Embree.
* Changing this default value would require updating the logic in oneapi_load_kernels(). */
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
RTC_FEATURE_FLAG_NONE};
# define IF_USING_EMBREE \
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
RTC_FEATURE_FLAG_NONE)
# define IF_NOT_USING_EMBREE \
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
RTC_FEATURE_FLAG_NONE)
#else
# define IF_USING_EMBREE
# define IF_NOT_USING_EMBREE
#endif
CCL_NAMESPACE_BEGIN
#ifdef __BVH2__
@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect(kg, ray, visibility, isect);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect(kg, ray, visibility, isect);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
# endif /* __HAIR__ */
return bvh_intersect_motion(kg, ray, isect, visibility);
}
return bvh_intersect_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
# endif /* __HAIR__ */
return bvh_intersect(kg, ray, isect, visibility);
return bvh_intersect(kg, ray, isect, visibility);
}
kernel_assert(false);
return false;
}
/* Single object BVH traversal, for SSS/AO/bevel. */
@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_local(
kg, ray, local_isect, local_object, lcg_state, max_hits);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
kernel_assert(false);
return false;
}
# endif
@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect_shadow_all(
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_shadow_all(
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
kernel_assert(false);
return false;
}
# endif /* __SHADOW_RECORD_ALL__ */
@ -239,13 +289,28 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
return false;
}
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
# ifdef __EMBREE__
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, visibility);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
return bvh_intersect_volume(kg, ray, isect, visibility);
}
kernel_assert(false);
return false;
}
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
@ -275,18 +340,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
IF_USING_EMBREE
{
if (kernel_data.device_bvh) {
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
}
}
# endif
IF_NOT_USING_EMBREE
{
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
kernel_assert(false);
return false;
}
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */

View File

@ -51,8 +51,6 @@ ccl_device_inline
int object = OBJECT_NONE;
float isect_t = ray->tmax;
int num_hits_in_instance = 0;
uint num_hits = 0;
isect_array->t = ray->tmax;
@ -152,7 +150,6 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
return num_hits;
@ -193,7 +190,6 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
return num_hits;
@ -219,7 +215,6 @@ ccl_device_inline
bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
++stack_ptr;

View File

@ -13,8 +13,13 @@
# include <embree3/rtcore_scene.h>
#endif
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/compat.h"
# include "kernel/device/oneapi/globals.h"
#else
# include "kernel/device/cpu/compat.h"
# include "kernel/device/cpu/globals.h"
#endif
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
@ -33,11 +38,16 @@ using numhit_t = uint8_t;
using numhit_t = uint32_t;
#endif
#define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#ifdef __KERNEL_ONEAPI__
# define CYCLES_EMBREE_USED_FEATURES \
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
#else
# define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#endif
#define EMBREE_IS_HAIR(x) (x & 1)
@ -99,7 +109,9 @@ struct CCLVolumeContext
#if EMBREE_MAJOR_VERSION >= 4
KernelGlobals kg;
const Ray *ray;
# ifdef __VOLUME_RECORD_ALL__
numhit_t max_hits;
# endif
numhit_t num_hits;
#endif
Intersection *vol_isect;
@ -252,7 +264,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
* Things like recording subsurface or shadow hits for later evaluation
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls. */
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
@ -263,7 +276,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
if (kernel_embree_is_self_intersection(
@ -277,7 +294,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -290,7 +307,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
Intersection current_isect;
@ -326,7 +347,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
}
/* Test if we need to record this transparent intersection. */
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
if (ctx->num_recorded_hits < max_record_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
@ -363,7 +384,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -376,7 +397,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
/* Check if it's hitting the correct object. */
@ -462,7 +487,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -475,11 +500,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const KernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
#ifdef __VOLUME_RECORD_ALL__
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
#endif
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
@ -496,10 +527,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
int object_flag = kernel_data_fetch(object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
#ifndef __VOLUME_RECORD_ALL__
/* Without __VOLUME_RECORD_ALL__ we need only a first counted hit, so we will
* continue tracing only if a current hit is not counted. */
*args->valid = 0;
#endif
}
#ifdef __VOLUME_RECORD_ALL__
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
#endif
}
#if EMBREE_MAJOR_VERSION < 4
@ -513,14 +551,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL:
kernel_embree_filter_occluded_shadow_all_func(args);
kernel_embree_filter_occluded_shadow_all_func_impl(args);
break;
case CCLIntersectContext::RAY_LOCAL:
case CCLIntersectContext::RAY_SSS:
kernel_embree_filter_occluded_local_func(args);
kernel_embree_filter_occluded_local_func_impl(args);
break;
case CCLIntersectContext::RAY_VOLUME_ALL:
kernel_embree_filter_occluded_volume_all_func(args);
kernel_embree_filter_occluded_volume_all_func_impl(args);
break;
case CCLIntersectContext::RAY_REGULAR:
@ -569,7 +607,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
kernel_embree_filter_occluded_func(args);
}
#endif
#ifdef __KERNEL_ONEAPI__
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_intersection_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_local_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
}
# define kernel_embree_filter_intersection_func \
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
# define kernel_embree_filter_occluded_shadow_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
# define kernel_embree_filter_occluded_local_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
# define kernel_embree_filter_occluded_volume_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
#else
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
# if EMBREE_MAJOR_VERSION >= 4
# define kernel_embree_filter_occluded_shadow_all_func \
kernel_embree_filter_occluded_shadow_all_func_impl
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
# define kernel_embree_filter_occluded_volume_all_func \
kernel_embree_filter_occluded_volume_all_func_impl
# endif
#endif
/* Scene intersection. */
@ -583,7 +677,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
#if EMBREE_MAJOR_VERSION >= 4
CCLFirstHitContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
#else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
rtcInitIntersectContext(&ctx);
@ -596,7 +698,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
#if EMBREE_MAJOR_VERSION >= 4
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
@ -625,7 +727,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
CCLLocalContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg,
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
@ -646,7 +756,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
# endif
@ -692,7 +802,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowStateCPU *state,
IntegratorShadowState state,
ccl_private const Ray *ray,
uint visibility,
uint max_hits,
@ -702,7 +812,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
CCLShadowContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
rtcInitIntersectContext(&ctx);
@ -718,7 +836,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_shadow_all_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
@ -736,19 +855,31 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private Intersection *isect,
# ifdef __VOLUME_RECORD_ALL__
const uint max_hits,
# endif
const uint visibility)
{
# if EMBREE_MAJOR_VERSION >= 4
CCLVolumeContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko) Cycles GPU back-ends passes NULL to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
rtcInitIntersectContext(&ctx);
# endif
ctx.vol_isect = isect;
# ifdef __VOLUME_RECORD_ALL__
ctx.max_hits = numhit_t(max_hits);
# endif
ctx.num_hits = numhit_t(0);
ctx.ray = ray;
RTCRay rtc_ray;
@ -756,7 +887,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_volume_all_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);

View File

@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
/* Intersection kernels need access to the kernel handler for specialization constants to work
* properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
@ -185,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_background,
ccl_global const int *path_index_array,
@ -249,6 +259,12 @@ ccl_gpu_kernel_postfix
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
#endif
/* Kernels using intersections need access to the kernel handler for specialization constants to
* work properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
ccl_global const int *path_index_array,
@ -287,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,

View File

@ -5,6 +5,11 @@
#define __KERNEL_GPU__
#define __KERNEL_ONEAPI__
#define __KERNEL_64_BIT__
#ifdef WITH_EMBREE_GPU
# define __KERNEL_GPU_RAYTRACING__
#endif
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
@ -57,17 +62,19 @@
#define ccl_gpu_kernel_threads(block_num_threads)
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
# define ccl_gpu_kernel_signature(name, ...) \
# define __ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
size_t kernel_local_size, \
sycl::handler &cgh, \
__VA_ARGS__) { \
(kg); \
cgh.parallel_for<class kernel_##name>( \
cgh.parallel_for( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
[=](sycl::nd_item<1> item) {
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
# define ccl_gpu_kernel_postfix \
}); \
}

View File

@ -0,0 +1,18 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2023 Intel Corporation */
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
# undef ccl_gpu_kernel_signature
# define ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
size_t kernel_local_size, \
sycl::handler &cgh, \
__VA_ARGS__) \
{ \
(kg); \
cgh.parallel_for( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
[=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
#endif

View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2023 Intel Corporation */
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
# undef ccl_gpu_kernel_signature
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
#endif

View File

@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
size_t nd_item_group_range_0;
size_t nd_item_global_id_0;
size_t nd_item_global_range_0;
#else
sycl::kernel_handler kernel_handler;
#endif
} KernelGlobalsGPU;

View File

@ -16,9 +16,22 @@
# include "kernel/device/gpu/kernel.h"
# include "device/kernel.cpp"
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
# ifdef WITH_EMBREE_GPU
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
(const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
(const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
# endif
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
@ -142,15 +155,99 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
return std::min(limit_work_group_size, preferred_work_group_size);
}
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
const uint kernel_features)
{
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
std::string::npos)
return false;
return true;
}
bool oneapi_kernel_is_raytrace_or_mnee(const std::string &kernel_name)
{
return (kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
std::string::npos) ||
(kernel_name.find(device_kernel_as_string(
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) != std::string::npos);
}
bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
{
# ifdef WITH_EMBREE_GPU
/* MNEE and Ray-trace kernels aren't yet enabled to use Embree. */
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
DeviceKernel kernel = (DeviceKernel)i;
if (device_kernel_has_intersection(kernel)) {
if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
return !oneapi_kernel_is_raytrace_or_mnee(kernel_name);
}
}
}
# endif
return false;
}
bool oneapi_load_kernels(SyclQueue *queue_,
const uint kernel_features,
bool use_hardware_raytracing)
{
# ifdef SYCL_SKIP_KERNELS_PRELOAD
(void)queue_;
(void)requested_features;
# else
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
# ifdef WITH_EMBREE_GPU
/* For best performance, we always JIT compile the kernels that are using Embree. */
if (use_hardware_raytracing) {
try {
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
{queue->get_device()});
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
const std::string &kernel_name = kernel_id.get_name();
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
!oneapi_kernel_is_using_embree(kernel_name)) {
continue;
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
/* Hair requires embree curves support. */
if (kernel_features & KERNEL_FEATURE_HAIR) {
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
sycl::build(one_kernel_bundle_input);
}
else {
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
sycl::build(one_kernel_bundle_input);
}
}
}
catch (sycl::exception const &e) {
if (s_error_cb) {
s_error_cb(e.what(), s_error_user_ptr);
}
return false;
}
}
# endif
try {
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
@ -159,27 +256,29 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
const std::string &kernel_name = kernel_id.get_name();
/* NOTE(@nsirgien): Names in this conditions below should match names from
* oneapi_call macro in oneapi_enqueue_kernel below */
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
/* In case HWRT is on, compilation of kernels using Embree is already handled in previous
* block. */
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
(use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
continue;
}
if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
# ifdef WITH_EMBREE_GPU
if (oneapi_kernel_is_using_embree(kernel_name) ||
oneapi_kernel_is_raytrace_or_mnee(kernel_name)) {
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
RTC_FEATURE_FLAG_NONE);
sycl::build(one_kernel_bundle_input);
continue;
}
if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
std::string::npos) {
continue;
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
sycl::build(one_kernel_bundle);
# endif
/* This call will ensure that AoT or cached JIT binaries are available
* for execution. It will trigger compilation if it is not already the case. */
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
{kernel_id});
}
}
catch (sycl::exception const &e) {
@ -188,13 +287,14 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
}
return false;
}
# endif
return true;
}
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
int kernel,
size_t global_size,
const uint kernel_features,
bool use_hardware_raytracing,
void **args)
{
bool success = true;
@ -248,6 +348,21 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
try {
queue->submit([&](sycl::handler &cgh) {
# ifdef WITH_EMBREE_GPU
/* Spec says it has no effect if the called kernel doesn't support the below specialization
* constant but it can still trigger a recompilation, so we set it only if needed. */
if (device_kernel_has_intersection(device_kernel)) {
const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
RTC_FEATURE_FLAG_NONE :
!(kernel_features & KERNEL_FEATURE_HAIR) ?
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
used_embree_features);
}
# else
(void)kernel_features;
# endif
switch (device_kernel) {
case DEVICE_KERNEL_INTEGRATOR_RESET: {
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
@ -549,4 +664,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
#endif /* WITH_ONEAPI */

View File

@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
int kernel,
size_t global_size,
const unsigned int kernel_features,
bool use_hardware_raytracing,
void **args);
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
const unsigned int requested_features);
const unsigned int kernel_features,
bool use_hardware_raytracing);
# ifdef __cplusplus
}
# endif
#endif /* WITH_ONEAPI */

View File

@ -3,8 +3,9 @@
#pragma once
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
# if EMBREE_MAJOR_VERSION >= 4
#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
defined(WITH_EMBREE)
# if EMBREE_MAJOR_VERSION == 4
# include <embree4/rtcore.h>
# include <embree4/rtcore_scene.h>
# else
@ -78,9 +79,8 @@ CCL_NAMESPACE_BEGIN
#define __VISIBILITY_FLAG__
#define __VOLUME__
/* TODO: solve internal compiler errors and enable light tree on HIP. */
/* TODO: solve internal compiler perf issue and enable light tree on Metal/AMD. */
#if defined(__KERNEL_HIP__) || defined(__KERNEL_METAL_AMD__)
#if defined(__KERNEL_METAL_AMD__)
# undef __LIGHT_TREE__
#endif

View File

@ -15,8 +15,12 @@ set(SRC
camera.cpp
colorspace.cpp
constant_fold.cpp
devicescene.cpp
film.cpp
geometry.cpp
geometry_attributes.cpp
geometry_bvh.cpp
geometry_mesh.cpp
hair.cpp
image.cpp
image_oiio.cpp
@ -55,6 +59,7 @@ set(SRC_HEADERS
camera.h
colorspace.h
constant_fold.h
devicescene.h
film.h
geometry.h
hair.h

View File

@ -0,0 +1,64 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "scene/devicescene.h"
#include "device/device.h"
#include "device/memory.h"
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
: bvh_nodes(device, "bvh_nodes", MEM_GLOBAL),
bvh_leaf_nodes(device, "bvh_leaf_nodes", MEM_GLOBAL),
object_node(device, "object_node", MEM_GLOBAL),
prim_type(device, "prim_type", MEM_GLOBAL),
prim_visibility(device, "prim_visibility", MEM_GLOBAL),
prim_index(device, "prim_index", MEM_GLOBAL),
prim_object(device, "prim_object", MEM_GLOBAL),
prim_time(device, "prim_time", MEM_GLOBAL),
tri_verts(device, "tri_verts", MEM_GLOBAL),
tri_shader(device, "tri_shader", MEM_GLOBAL),
tri_vnormal(device, "tri_vnormal", MEM_GLOBAL),
tri_vindex(device, "tri_vindex", MEM_GLOBAL),
tri_patch(device, "tri_patch", MEM_GLOBAL),
tri_patch_uv(device, "tri_patch_uv", MEM_GLOBAL),
curves(device, "curves", MEM_GLOBAL),
curve_keys(device, "curve_keys", MEM_GLOBAL),
curve_segments(device, "curve_segments", MEM_GLOBAL),
patches(device, "patches", MEM_GLOBAL),
points(device, "points", MEM_GLOBAL),
points_shader(device, "points_shader", MEM_GLOBAL),
objects(device, "objects", MEM_GLOBAL),
object_motion_pass(device, "object_motion_pass", MEM_GLOBAL),
object_motion(device, "object_motion", MEM_GLOBAL),
object_flag(device, "object_flag", MEM_GLOBAL),
object_volume_step(device, "object_volume_step", MEM_GLOBAL),
object_prim_offset(device, "object_prim_offset", MEM_GLOBAL),
camera_motion(device, "camera_motion", MEM_GLOBAL),
attributes_map(device, "attributes_map", MEM_GLOBAL),
attributes_float(device, "attributes_float", MEM_GLOBAL),
attributes_float2(device, "attributes_float2", MEM_GLOBAL),
attributes_float3(device, "attributes_float3", MEM_GLOBAL),
attributes_float4(device, "attributes_float4", MEM_GLOBAL),
attributes_uchar4(device, "attributes_uchar4", MEM_GLOBAL),
light_distribution(device, "light_distribution", MEM_GLOBAL),
lights(device, "lights", MEM_GLOBAL),
light_background_marginal_cdf(device, "light_background_marginal_cdf", MEM_GLOBAL),
light_background_conditional_cdf(device, "light_background_conditional_cdf", MEM_GLOBAL),
light_tree_nodes(device, "light_tree_nodes", MEM_GLOBAL),
light_tree_emitters(device, "light_tree_emitters", MEM_GLOBAL),
light_to_tree(device, "light_to_tree", MEM_GLOBAL),
object_to_tree(device, "object_to_tree", MEM_GLOBAL),
object_lookup_offset(device, "object_lookup_offset", MEM_GLOBAL),
triangle_to_tree(device, "triangle_to_tree", MEM_GLOBAL),
particles(device, "particles", MEM_GLOBAL),
svm_nodes(device, "svm_nodes", MEM_GLOBAL),
shaders(device, "shaders", MEM_GLOBAL),
lookup_table(device, "lookup_table", MEM_GLOBAL),
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
ies_lights(device, "ies", MEM_GLOBAL)
{
memset((void *)&data, 0, sizeof(data));
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,101 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifndef __DEVICESCENE_H__
#define __DEVICESCENE_H__
#include "device/device.h"
#include "device/memory.h"
#include "util/types.h"
#include "util/vector.h"
CCL_NAMESPACE_BEGIN
class DeviceScene {
public:
/* BVH */
device_vector<int4> bvh_nodes;
device_vector<int4> bvh_leaf_nodes;
device_vector<int> object_node;
device_vector<int> prim_type;
device_vector<uint> prim_visibility;
device_vector<int> prim_index;
device_vector<int> prim_object;
device_vector<float2> prim_time;
/* mesh */
device_vector<packed_float3> tri_verts;
device_vector<uint> tri_shader;
device_vector<packed_float3> tri_vnormal;
device_vector<packed_uint3> tri_vindex;
device_vector<uint> tri_patch;
device_vector<float2> tri_patch_uv;
device_vector<KernelCurve> curves;
device_vector<float4> curve_keys;
device_vector<KernelCurveSegment> curve_segments;
device_vector<uint> patches;
/* point-cloud */
device_vector<float4> points;
device_vector<uint> points_shader;
/* objects */
device_vector<KernelObject> objects;
device_vector<Transform> object_motion_pass;
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
device_vector<float> object_volume_step;
device_vector<uint> object_prim_offset;
/* cameras */
device_vector<DecomposedTransform> camera_motion;
/* attributes */
device_vector<AttributeMap> attributes_map;
device_vector<float> attributes_float;
device_vector<float2> attributes_float2;
device_vector<packed_float3> attributes_float3;
device_vector<float4> attributes_float4;
device_vector<uchar4> attributes_uchar4;
/* lights */
device_vector<KernelLightDistribution> light_distribution;
device_vector<KernelLight> lights;
device_vector<float2> light_background_marginal_cdf;
device_vector<float2> light_background_conditional_cdf;
/* light tree */
device_vector<KernelLightTreeNode> light_tree_nodes;
device_vector<KernelLightTreeEmitter> light_tree_emitters;
device_vector<uint> light_to_tree;
device_vector<uint> object_to_tree;
device_vector<uint> object_lookup_offset;
device_vector<uint> triangle_to_tree;
/* particles */
device_vector<KernelParticle> particles;
/* shaders */
device_vector<int4> svm_nodes;
device_vector<KernelShader> shaders;
/* lookup tables */
device_vector<float> lookup_table;
/* integrator */
device_vector<float> sample_pattern_lut;
/* IES lights */
device_vector<float> ies_lights;
KernelData data;
DeviceScene(Device *device);
};
CCL_NAMESPACE_END
#endif /* __DEVICESCENE_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,38 @@ class Shader;
class Volume;
struct PackedBVH;
/* Set of flags used to help determining what data has been modified or needs reallocation, so we
* can decide which device data to free or update. */
enum {
DEVICE_CURVE_DATA_MODIFIED = (1 << 0),
DEVICE_MESH_DATA_MODIFIED = (1 << 1),
DEVICE_POINT_DATA_MODIFIED = (1 << 2),
ATTR_FLOAT_MODIFIED = (1 << 3),
ATTR_FLOAT2_MODIFIED = (1 << 4),
ATTR_FLOAT3_MODIFIED = (1 << 5),
ATTR_FLOAT4_MODIFIED = (1 << 6),
ATTR_UCHAR4_MODIFIED = (1 << 7),
CURVE_DATA_NEED_REALLOC = (1 << 8),
MESH_DATA_NEED_REALLOC = (1 << 9),
POINT_DATA_NEED_REALLOC = (1 << 10),
ATTR_FLOAT_NEEDS_REALLOC = (1 << 11),
ATTR_FLOAT2_NEEDS_REALLOC = (1 << 12),
ATTR_FLOAT3_NEEDS_REALLOC = (1 << 13),
ATTR_FLOAT4_NEEDS_REALLOC = (1 << 14),
ATTR_UCHAR4_NEEDS_REALLOC = (1 << 15),
ATTRS_NEED_REALLOC = (ATTR_FLOAT_NEEDS_REALLOC | ATTR_FLOAT2_NEEDS_REALLOC |
ATTR_FLOAT3_NEEDS_REALLOC | ATTR_FLOAT4_NEEDS_REALLOC |
ATTR_UCHAR4_NEEDS_REALLOC),
DEVICE_MESH_DATA_NEEDS_REALLOC = (MESH_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
DEVICE_POINT_DATA_NEEDS_REALLOC = (POINT_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
DEVICE_CURVE_DATA_NEEDS_REALLOC = (CURVE_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
};
/* Geometry
*
* Base class for geometric types like Mesh and Hair. */

View File

@ -0,0 +1,722 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "bvh/bvh.h"
#include "bvh/bvh2.h"
#include "device/device.h"
#include "scene/attribute.h"
#include "scene/camera.h"
#include "scene/geometry.h"
#include "scene/hair.h"
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
#include "scene/shader_nodes.h"
#include "scene/stats.h"
#include "scene/volume.h"
#include "subd/patch_table.h"
#include "subd/split.h"
#include "kernel/osl/globals.h"
#include "util/foreach.h"
#include "util/log.h"
#include "util/progress.h"
#include "util/task.h"
CCL_NAMESPACE_BEGIN
bool Geometry::need_attribute(Scene *scene, AttributeStandard std)
{
if (std == ATTR_STD_NONE)
return false;
if (scene->need_global_attribute(std))
return true;
foreach (Node *node, used_shaders) {
Shader *shader = static_cast<Shader *>(node);
if (shader->attributes.find(std))
return true;
}
return false;
}
bool Geometry::need_attribute(Scene * /*scene*/, ustring name)
{
if (name == ustring())
return false;
foreach (Node *node, used_shaders) {
Shader *shader = static_cast<Shader *>(node);
if (shader->attributes.find(name))
return true;
}
return false;
}
AttributeRequestSet Geometry::needed_attributes()
{
AttributeRequestSet result;
foreach (Node *node, used_shaders) {
Shader *shader = static_cast<Shader *>(node);
result.add(shader->attributes);
}
return result;
}
bool Geometry::has_voxel_attributes() const
{
foreach (const Attribute &attr, attributes.attributes) {
if (attr.element == ATTR_ELEMENT_VOXEL) {
return true;
}
}
return false;
}
/* Generate a normal attribute map entry from an attribute descriptor. */
static void emit_attribute_map_entry(AttributeMap *attr_map,
size_t index,
uint64_t id,
TypeDesc type,
const AttributeDescriptor &desc)
{
attr_map[index].id = id;
attr_map[index].element = desc.element;
attr_map[index].offset = as_uint(desc.offset);
if (type == TypeDesc::TypeFloat)
attr_map[index].type = NODE_ATTR_FLOAT;
else if (type == TypeDesc::TypeMatrix)
attr_map[index].type = NODE_ATTR_MATRIX;
else if (type == TypeFloat2)
attr_map[index].type = NODE_ATTR_FLOAT2;
else if (type == TypeFloat4)
attr_map[index].type = NODE_ATTR_FLOAT4;
else if (type == TypeRGBA)
attr_map[index].type = NODE_ATTR_RGBA;
else
attr_map[index].type = NODE_ATTR_FLOAT3;
attr_map[index].flags = desc.flags;
}
/* Generate an attribute map end marker, optionally including a link to another map.
* Links are used to connect object attribute maps to mesh attribute maps. */
static void emit_attribute_map_terminator(AttributeMap *attr_map,
size_t index,
bool chain,
uint chain_link)
{
for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
attr_map[index + j].id = ATTR_STD_NONE;
attr_map[index + j].element = chain; /* link is valid flag */
attr_map[index + j].offset = chain ? chain_link + j : 0; /* link to the correct sub-entry */
attr_map[index + j].type = 0;
attr_map[index + j].flags = 0;
}
}
/* Generate all necessary attribute map entries from the attribute request. */
static void emit_attribute_mapping(
AttributeMap *attr_map, size_t index, uint64_t id, AttributeRequest &req, Geometry *geom)
{
emit_attribute_map_entry(attr_map, index, id, req.type, req.desc);
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->get_num_subd_faces()) {
emit_attribute_map_entry(attr_map, index + 1, id, req.subd_type, req.subd_desc);
}
}
}
void GeometryManager::update_svm_attributes(Device *,
DeviceScene *dscene,
Scene *scene,
vector<AttributeRequestSet> &geom_attributes,
vector<AttributeRequestSet> &object_attributes)
{
/* for SVM, the attributes_map table is used to lookup the offset of an
* attribute, based on a unique shader attribute id. */
/* compute array stride */
size_t attr_map_size = 0;
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
geom->attr_map_offset = attr_map_size;
#ifdef WITH_OSL
size_t attr_count = 0;
foreach (AttributeRequest &req, geom_attributes[i].requests) {
if (req.std != ATTR_STD_NONE &&
scene->shader_manager->get_attribute_id(req.std) != (uint64_t)req.std)
attr_count += 2;
else
attr_count += 1;
}
#else
const size_t attr_count = geom_attributes[i].size();
#endif
attr_map_size += (attr_count + 1) * ATTR_PRIM_TYPES;
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
/* only allocate a table for the object if it actually has attributes */
if (object_attributes[i].size() == 0) {
object->attr_map_offset = 0;
}
else {
object->attr_map_offset = attr_map_size;
attr_map_size += (object_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
}
}
if (attr_map_size == 0)
return;
if (!dscene->attributes_map.need_realloc()) {
return;
}
/* create attribute map */
AttributeMap *attr_map = dscene->attributes_map.alloc(attr_map_size);
memset(attr_map, 0, dscene->attributes_map.size() * sizeof(*attr_map));
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
AttributeRequestSet &attributes = geom_attributes[i];
/* set geometry attributes */
size_t index = geom->attr_map_offset;
foreach (AttributeRequest &req, attributes.requests) {
uint64_t id;
if (req.std == ATTR_STD_NONE)
id = scene->shader_manager->get_attribute_id(req.name);
else
id = scene->shader_manager->get_attribute_id(req.std);
emit_attribute_mapping(attr_map, index, id, req, geom);
index += ATTR_PRIM_TYPES;
#ifdef WITH_OSL
/* Some standard attributes are explicitly referenced via their standard ID, so add those
* again in case they were added under a different attribute ID. */
if (req.std != ATTR_STD_NONE && id != (uint64_t)req.std) {
emit_attribute_mapping(attr_map, index, (uint64_t)req.std, req, geom);
index += ATTR_PRIM_TYPES;
}
#endif
}
emit_attribute_map_terminator(attr_map, index, false, 0);
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
AttributeRequestSet &attributes = object_attributes[i];
/* set object attributes */
if (attributes.size() > 0) {
size_t index = object->attr_map_offset;
foreach (AttributeRequest &req, attributes.requests) {
uint64_t id;
if (req.std == ATTR_STD_NONE)
id = scene->shader_manager->get_attribute_id(req.name);
else
id = scene->shader_manager->get_attribute_id(req.std);
emit_attribute_mapping(attr_map, index, id, req, object->geometry);
index += ATTR_PRIM_TYPES;
}
emit_attribute_map_terminator(attr_map, index, true, object->geometry->attr_map_offset);
}
}
/* copy to device */
dscene->attributes_map.copy_to_device();
}
void GeometryManager::update_attribute_element_offset(Geometry *geom,
device_vector<float> &attr_float,
size_t &attr_float_offset,
device_vector<float2> &attr_float2,
size_t &attr_float2_offset,
device_vector<packed_float3> &attr_float3,
size_t &attr_float3_offset,
device_vector<float4> &attr_float4,
size_t &attr_float4_offset,
device_vector<uchar4> &attr_uchar4,
size_t &attr_uchar4_offset,
Attribute *mattr,
AttributePrimitive prim,
TypeDesc &type,
AttributeDescriptor &desc)
{
if (mattr) {
/* store element and type */
desc.element = mattr->element;
desc.flags = mattr->flags;
type = mattr->type;
/* store attribute data in arrays */
size_t size = mattr->element_size(geom, prim);
AttributeElement &element = desc.element;
int &offset = desc.offset;
if (mattr->element == ATTR_ELEMENT_VOXEL) {
/* store slot in offset value */
ImageHandle &handle = mattr->data_voxel();
offset = handle.svm_slot();
}
else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
uchar4 *data = mattr->data_uchar4();
offset = attr_uchar4_offset;
assert(attr_uchar4.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_uchar4[offset + k] = data[k];
}
attr_uchar4.tag_modified();
}
attr_uchar4_offset += size;
}
else if (mattr->type == TypeDesc::TypeFloat) {
float *data = mattr->data_float();
offset = attr_float_offset;
assert(attr_float.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float[offset + k] = data[k];
}
attr_float.tag_modified();
}
attr_float_offset += size;
}
else if (mattr->type == TypeFloat2) {
float2 *data = mattr->data_float2();
offset = attr_float2_offset;
assert(attr_float2.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float2[offset + k] = data[k];
}
attr_float2.tag_modified();
}
attr_float2_offset += size;
}
else if (mattr->type == TypeDesc::TypeMatrix) {
Transform *tfm = mattr->data_transform();
offset = attr_float4_offset;
assert(attr_float4.size() >= offset + size * 3);
if (mattr->modified) {
for (size_t k = 0; k < size * 3; k++) {
attr_float4[offset + k] = (&tfm->x)[k];
}
attr_float4.tag_modified();
}
attr_float4_offset += size * 3;
}
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
float4 *data = mattr->data_float4();
offset = attr_float4_offset;
assert(attr_float4.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float4[offset + k] = data[k];
}
attr_float4.tag_modified();
}
attr_float4_offset += size;
}
else {
float3 *data = mattr->data_float3();
offset = attr_float3_offset;
assert(attr_float3.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float3[offset + k] = data[k];
}
attr_float3.tag_modified();
}
attr_float3_offset += size;
}
/* mesh vertex/curve index is global, not per object, so we sneak
* a correction for that in here */
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
desc.flags & ATTR_SUBDIVIDED) {
/* Indices for subdivided attributes are retrieved
* from patch table so no need for correction here. */
}
else if (element == ATTR_ELEMENT_VERTEX)
offset -= mesh->vert_offset;
else if (element == ATTR_ELEMENT_VERTEX_MOTION)
offset -= mesh->vert_offset;
else if (element == ATTR_ELEMENT_FACE) {
if (prim == ATTR_PRIM_GEOMETRY)
offset -= mesh->prim_offset;
else
offset -= mesh->face_offset;
}
else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
if (prim == ATTR_PRIM_GEOMETRY)
offset -= 3 * mesh->prim_offset;
else
offset -= mesh->corner_offset;
}
}
else if (geom->is_hair()) {
Hair *hair = static_cast<Hair *>(geom);
if (element == ATTR_ELEMENT_CURVE)
offset -= hair->prim_offset;
else if (element == ATTR_ELEMENT_CURVE_KEY)
offset -= hair->curve_key_offset;
else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
offset -= hair->curve_key_offset;
}
else if (geom->is_pointcloud()) {
if (element == ATTR_ELEMENT_VERTEX)
offset -= geom->prim_offset;
else if (element == ATTR_ELEMENT_VERTEX_MOTION)
offset -= geom->prim_offset;
}
}
else {
/* attribute not found */
desc.element = ATTR_ELEMENT_NONE;
desc.offset = 0;
}
}
static void update_attribute_element_size(Geometry *geom,
Attribute *mattr,
AttributePrimitive prim,
size_t *attr_float_size,
size_t *attr_float2_size,
size_t *attr_float3_size,
size_t *attr_float4_size,
size_t *attr_uchar4_size)
{
if (mattr) {
size_t size = mattr->element_size(geom, prim);
if (mattr->element == ATTR_ELEMENT_VOXEL) {
/* pass */
}
else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
*attr_uchar4_size += size;
}
else if (mattr->type == TypeDesc::TypeFloat) {
*attr_float_size += size;
}
else if (mattr->type == TypeFloat2) {
*attr_float2_size += size;
}
else if (mattr->type == TypeDesc::TypeMatrix) {
*attr_float4_size += size * 4;
}
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
*attr_float4_size += size;
}
else {
*attr_float3_size += size;
}
}
}
void GeometryManager::device_update_attributes(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress)
{
progress.set_status("Updating Mesh", "Computing attributes");
/* gather per mesh requested attributes. as meshes may have multiple
* shaders assigned, this merges the requested attributes that have
* been set per shader by the shader manager */
vector<AttributeRequestSet> geom_attributes(scene->geometry.size());
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
geom->index = i;
scene->need_global_attributes(geom_attributes[i]);
foreach (Node *node, geom->get_used_shaders()) {
Shader *shader = static_cast<Shader *>(node);
geom_attributes[i].add(shader->attributes);
}
if (geom->is_hair() && static_cast<Hair *>(geom)->need_shadow_transparency()) {
geom_attributes[i].add(ATTR_STD_SHADOW_TRANSPARENCY);
}
}
/* convert object attributes to use the same data structures as geometry ones */
vector<AttributeRequestSet> object_attributes(scene->objects.size());
vector<AttributeSet> object_attribute_values;
object_attribute_values.reserve(scene->objects.size());
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
Geometry *geom = object->geometry;
size_t geom_idx = geom->index;
assert(geom_idx < scene->geometry.size() && scene->geometry[geom_idx] == geom);
object_attribute_values.push_back(AttributeSet(geom, ATTR_PRIM_GEOMETRY));
AttributeRequestSet &geom_requests = geom_attributes[geom_idx];
AttributeRequestSet &attributes = object_attributes[i];
AttributeSet &values = object_attribute_values[i];
for (size_t j = 0; j < object->attributes.size(); j++) {
ParamValue &param = object->attributes[j];
/* add attributes that are requested and not already handled by the mesh */
if (geom_requests.find(param.name()) && !geom->attributes.find(param.name())) {
attributes.add(param.name());
Attribute *attr = values.add(param.name(), param.type(), ATTR_ELEMENT_OBJECT);
assert(param.datasize() == attr->buffer.size());
memcpy(attr->buffer.data(), param.data(), param.datasize());
}
}
}
/* mesh attribute are stored in a single array per data type. here we fill
* those arrays, and set the offset and element type to create attribute
* maps next */
/* Pre-allocate attributes to avoid arrays re-allocation which would
* take 2x of overall attribute memory usage.
*/
size_t attr_float_size = 0;
size_t attr_float2_size = 0;
size_t attr_float3_size = 0;
size_t attr_float4_size = 0;
size_t attr_uchar4_size = 0;
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
AttributeRequestSet &attributes = geom_attributes[i];
foreach (AttributeRequest &req, attributes.requests) {
Attribute *attr = geom->attributes.find(req);
update_attribute_element_size(geom,
attr,
ATTR_PRIM_GEOMETRY,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
Attribute *subd_attr = mesh->subd_attributes.find(req);
update_attribute_element_size(mesh,
subd_attr,
ATTR_PRIM_SUBD,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
}
}
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
foreach (Attribute &attr, object_attribute_values[i].attributes) {
update_attribute_element_size(object->geometry,
&attr,
ATTR_PRIM_GEOMETRY,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
}
}
dscene->attributes_float.alloc(attr_float_size);
dscene->attributes_float2.alloc(attr_float2_size);
dscene->attributes_float3.alloc(attr_float3_size);
dscene->attributes_float4.alloc(attr_float4_size);
dscene->attributes_uchar4.alloc(attr_uchar4_size);
/* The order of those flags needs to match that of AttrKernelDataType. */
const bool attributes_need_realloc[AttrKernelDataType::NUM] = {
dscene->attributes_float.need_realloc(),
dscene->attributes_float2.need_realloc(),
dscene->attributes_float3.need_realloc(),
dscene->attributes_float4.need_realloc(),
dscene->attributes_uchar4.need_realloc(),
};
size_t attr_float_offset = 0;
size_t attr_float2_offset = 0;
size_t attr_float3_offset = 0;
size_t attr_float4_offset = 0;
size_t attr_uchar4_offset = 0;
/* Fill in attributes. */
for (size_t i = 0; i < scene->geometry.size(); i++) {
Geometry *geom = scene->geometry[i];
AttributeRequestSet &attributes = geom_attributes[i];
/* todo: we now store std and name attributes from requests even if
* they actually refer to the same mesh attributes, optimize */
foreach (AttributeRequest &req, attributes.requests) {
Attribute *attr = geom->attributes.find(req);
if (attr) {
/* force a copy if we need to reallocate all the data */
attr->modified |= attributes_need_realloc[Attribute::kernel_type(*attr)];
}
update_attribute_element_offset(geom,
dscene->attributes_float,
attr_float_offset,
dscene->attributes_float2,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
attr,
ATTR_PRIM_GEOMETRY,
req.type,
req.desc);
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
Attribute *subd_attr = mesh->subd_attributes.find(req);
if (subd_attr) {
/* force a copy if we need to reallocate all the data */
subd_attr->modified |= attributes_need_realloc[Attribute::kernel_type(*subd_attr)];
}
update_attribute_element_offset(mesh,
dscene->attributes_float,
attr_float_offset,
dscene->attributes_float2,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
subd_attr,
ATTR_PRIM_SUBD,
req.subd_type,
req.subd_desc);
}
if (progress.get_cancel())
return;
}
}
for (size_t i = 0; i < scene->objects.size(); i++) {
Object *object = scene->objects[i];
AttributeRequestSet &attributes = object_attributes[i];
AttributeSet &values = object_attribute_values[i];
foreach (AttributeRequest &req, attributes.requests) {
Attribute *attr = values.find(req);
if (attr) {
attr->modified |= attributes_need_realloc[Attribute::kernel_type(*attr)];
}
update_attribute_element_offset(object->geometry,
dscene->attributes_float,
attr_float_offset,
dscene->attributes_float2,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
attr,
ATTR_PRIM_GEOMETRY,
req.type,
req.desc);
/* object attributes don't care about subdivision */
req.subd_type = req.type;
req.subd_desc = req.desc;
if (progress.get_cancel())
return;
}
}
/* create attribute lookup maps */
if (scene->shader_manager->use_osl())
update_osl_globals(device, scene);
update_svm_attributes(device, dscene, scene, geom_attributes, object_attributes);
if (progress.get_cancel())
return;
/* copy to device */
progress.set_status("Updating Mesh", "Copying Attributes to device");
dscene->attributes_float.copy_to_device_if_modified();
dscene->attributes_float2.copy_to_device_if_modified();
dscene->attributes_float3.copy_to_device_if_modified();
dscene->attributes_float4.copy_to_device_if_modified();
dscene->attributes_uchar4.copy_to_device_if_modified();
if (progress.get_cancel())
return;
/* After mesh attributes and patch tables have been copied to device memory,
* we need to update offsets in the objects. */
scene->object_manager->device_update_geom_offsets(device, dscene, scene);
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,196 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "bvh/bvh.h"
#include "bvh/bvh2.h"
#include "device/device.h"
#include "scene/attribute.h"
#include "scene/camera.h"
#include "scene/geometry.h"
#include "scene/hair.h"
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
#include "scene/shader_nodes.h"
#include "scene/stats.h"
#include "scene/volume.h"
#include "subd/patch_table.h"
#include "subd/split.h"
#include "kernel/osl/globals.h"
#include "util/foreach.h"
#include "util/log.h"
#include "util/progress.h"
#include "util/task.h"
CCL_NAMESPACE_BEGIN
void Geometry::compute_bvh(Device *device,
DeviceScene *dscene,
SceneParams *params,
Progress *progress,
size_t n,
size_t total)
{
if (progress->get_cancel())
return;
compute_bounds();
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
if (need_build_bvh(bvh_layout)) {
string msg = "Updating Geometry BVH ";
if (name.empty())
msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
else
msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
Object object;
/* Ensure all visibility bits are set at the geometry level BVH. In
* the object level BVH is where actual visibility is tested. */
object.set_is_shadow_catcher(true);
object.set_visibility(~0);
object.set_geometry(this);
vector<Geometry *> geometry;
geometry.push_back(this);
vector<Object *> objects;
objects.push_back(&object);
if (bvh && !need_update_rebuild) {
progress->set_status(msg, "Refitting BVH");
bvh->replace_geometry(geometry, objects);
device->build_bvh(bvh, *progress, true);
}
else {
progress->set_status(msg, "Building BVH");
BVHParams bparams;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_compact_structure = params->use_bvh_compact_structure;
bparams.bvh_layout = bvh_layout;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
params->use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
bparams.num_motion_curve_steps = params->num_bvh_time_steps;
bparams.num_motion_point_steps = params->num_bvh_time_steps;
bparams.bvh_type = params->bvh_type;
bparams.curve_subdivisions = params->curve_subdivisions();
delete bvh;
bvh = BVH::create(bparams, geometry, objects, device);
MEM_GUARDED_CALL(progress, device->build_bvh, bvh, *progress, false);
}
}
need_update_rebuild = false;
need_update_bvh_for_offset = false;
}
void GeometryManager::device_update_bvh(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress)
{
/* bvh build */
progress.set_status("Updating Scene BVH", "Building");
BVHParams bparams;
bparams.top_level = true;
bparams.bvh_layout = BVHParams::best_bvh_layout(
scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
scene->params.use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_point_steps = scene->params.num_bvh_time_steps;
bparams.bvh_type = scene->params.bvh_type;
bparams.curve_subdivisions = scene->params.curve_subdivisions();
VLOG_INFO << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
const bool can_refit = scene->bvh != nullptr &&
(bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX ||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL);
BVH *bvh = scene->bvh;
if (!scene->bvh) {
bvh = scene->bvh = BVH::create(bparams, scene->geometry, scene->objects, device);
}
device->build_bvh(bvh, progress, can_refit);
if (progress.get_cancel()) {
return;
}
const bool has_bvh2_layout = (bparams.bvh_layout == BVH_LAYOUT_BVH2);
PackedBVH pack;
if (has_bvh2_layout) {
pack = std::move(static_cast<BVH2 *>(bvh)->pack);
}
else {
pack.root_index = -1;
}
/* copy to device */
progress.set_status("Updating Scene BVH", "Copying BVH to device");
/* When using BVH2, we always have to copy/update the data as its layout is dependent on the
* BVH's leaf nodes which may be different when the objects or vertices move. */
if (pack.nodes.size()) {
dscene->bvh_nodes.steal_data(pack.nodes);
dscene->bvh_nodes.copy_to_device();
}
if (pack.leaf_nodes.size()) {
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
dscene->bvh_leaf_nodes.copy_to_device();
}
if (pack.object_node.size()) {
dscene->object_node.steal_data(pack.object_node);
dscene->object_node.copy_to_device();
}
if (pack.prim_type.size()) {
dscene->prim_type.steal_data(pack.prim_type);
dscene->prim_type.copy_to_device();
}
if (pack.prim_visibility.size()) {
dscene->prim_visibility.steal_data(pack.prim_visibility);
dscene->prim_visibility.copy_to_device();
}
if (pack.prim_index.size()) {
dscene->prim_index.steal_data(pack.prim_index);
dscene->prim_index.copy_to_device();
}
if (pack.prim_object.size()) {
dscene->prim_object.steal_data(pack.prim_object);
dscene->prim_object.copy_to_device();
}
if (pack.prim_time.size()) {
dscene->prim_time.steal_data(pack.prim_time);
dscene->prim_time.copy_to_device();
}
dscene->data.bvh.root = pack.root_index;
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
/* The scene handle is set in 'CPUDevice::const_copy_to' and 'OptiXDevice::const_copy_to' */
dscene->data.device_bvh = 0;
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,223 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "bvh/bvh.h"
#include "bvh/bvh2.h"
#include "device/device.h"
#include "scene/attribute.h"
#include "scene/camera.h"
#include "scene/geometry.h"
#include "scene/hair.h"
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/osl.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
#include "scene/shader_nodes.h"
#include "scene/stats.h"
#include "scene/volume.h"
#include "subd/patch_table.h"
#include "subd/split.h"
#ifdef WITH_OSL
# include "kernel/osl/globals.h"
#endif
#include "util/foreach.h"
#include "util/log.h"
#include "util/progress.h"
#include "util/task.h"
CCL_NAMESPACE_BEGIN
void GeometryManager::device_update_mesh(Device *,
DeviceScene *dscene,
Scene *scene,
Progress &progress)
{
/* Count. */
size_t vert_size = 0;
size_t tri_size = 0;
size_t curve_key_size = 0;
size_t curve_size = 0;
size_t curve_segment_size = 0;
size_t point_size = 0;
size_t patch_size = 0;
foreach (Geometry *geom, scene->geometry) {
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
Mesh *mesh = static_cast<Mesh *>(geom);
vert_size += mesh->verts.size();
tri_size += mesh->num_triangles();
if (mesh->get_num_subd_faces()) {
Mesh::SubdFace last = mesh->get_subd_face(mesh->get_num_subd_faces() - 1);
patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
/* patch tables are stored in same array so include them in patch_size */
if (mesh->patch_table) {
mesh->patch_table_offset = patch_size;
patch_size += mesh->patch_table->total_size();
}
}
}
else if (geom->is_hair()) {
Hair *hair = static_cast<Hair *>(geom);
curve_key_size += hair->get_curve_keys().size();
curve_size += hair->num_curves();
curve_segment_size += hair->num_segments();
}
else if (geom->is_pointcloud()) {
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
point_size += pointcloud->num_points();
}
}
/* Fill in all the arrays. */
if (tri_size != 0) {
/* normals */
progress.set_status("Updating Mesh", "Computing normals");
packed_float3 *tri_verts = dscene->tri_verts.alloc(vert_size);
uint *tri_shader = dscene->tri_shader.alloc(tri_size);
packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size);
packed_uint3 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
uint *tri_patch = dscene->tri_patch.alloc(tri_size);
float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
const bool copy_all_data = dscene->tri_shader.need_realloc() ||
dscene->tri_vindex.need_realloc() ||
dscene->tri_vnormal.need_realloc() ||
dscene->tri_patch.need_realloc() ||
dscene->tri_patch_uv.need_realloc();
foreach (Geometry *geom, scene->geometry) {
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->shader_is_modified() || mesh->smooth_is_modified() ||
mesh->triangles_is_modified() || copy_all_data) {
mesh->pack_shaders(scene, &tri_shader[mesh->prim_offset]);
}
if (mesh->verts_is_modified() || copy_all_data) {
mesh->pack_normals(&vnormal[mesh->vert_offset]);
}
if (mesh->verts_is_modified() || mesh->triangles_is_modified() ||
mesh->vert_patch_uv_is_modified() || copy_all_data) {
mesh->pack_verts(&tri_verts[mesh->vert_offset],
&tri_vindex[mesh->prim_offset],
&tri_patch[mesh->prim_offset],
&tri_patch_uv[mesh->vert_offset]);
}
if (progress.get_cancel())
return;
}
}
/* vertex coordinates */
progress.set_status("Updating Mesh", "Copying Mesh to device");
dscene->tri_verts.copy_to_device_if_modified();
dscene->tri_shader.copy_to_device_if_modified();
dscene->tri_vnormal.copy_to_device_if_modified();
dscene->tri_vindex.copy_to_device_if_modified();
dscene->tri_patch.copy_to_device_if_modified();
dscene->tri_patch_uv.copy_to_device_if_modified();
}
if (curve_segment_size != 0) {
progress.set_status("Updating Mesh", "Copying Curves to device");
float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
KernelCurve *curves = dscene->curves.alloc(curve_size);
KernelCurveSegment *curve_segments = dscene->curve_segments.alloc(curve_segment_size);
const bool copy_all_data = dscene->curve_keys.need_realloc() ||
dscene->curves.need_realloc() ||
dscene->curve_segments.need_realloc();
foreach (Geometry *geom, scene->geometry) {
if (geom->is_hair()) {
Hair *hair = static_cast<Hair *>(geom);
bool curve_keys_co_modified = hair->curve_radius_is_modified() ||
hair->curve_keys_is_modified();
bool curve_data_modified = hair->curve_shader_is_modified() ||
hair->curve_first_key_is_modified();
if (!curve_keys_co_modified && !curve_data_modified && !copy_all_data) {
continue;
}
hair->pack_curves(scene,
&curve_keys[hair->curve_key_offset],
&curves[hair->prim_offset],
&curve_segments[hair->curve_segment_offset]);
if (progress.get_cancel())
return;
}
}
dscene->curve_keys.copy_to_device_if_modified();
dscene->curves.copy_to_device_if_modified();
dscene->curve_segments.copy_to_device_if_modified();
}
if (point_size != 0) {
progress.set_status("Updating Mesh", "Copying Point clouds to device");
float4 *points = dscene->points.alloc(point_size);
uint *points_shader = dscene->points_shader.alloc(point_size);
foreach (Geometry *geom, scene->geometry) {
if (geom->is_pointcloud()) {
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
pointcloud->pack(
scene, &points[pointcloud->prim_offset], &points_shader[pointcloud->prim_offset]);
if (progress.get_cancel())
return;
}
}
dscene->points.copy_to_device();
dscene->points_shader.copy_to_device();
}
if (patch_size != 0 && dscene->patches.need_realloc()) {
progress.set_status("Updating Mesh", "Copying Patches to device");
uint *patch_data = dscene->patches.alloc(patch_size);
foreach (Geometry *geom, scene->geometry) {
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
mesh->pack_patches(&patch_data[mesh->patch_offset]);
if (mesh->patch_table) {
mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
mesh->patch_table_offset);
}
if (progress.get_cancel())
return;
}
}
dscene->patches.copy_to_device();
}
}
CCL_NAMESPACE_END

View File

@ -595,7 +595,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene)
{
if (!scene->integrator->get_use_light_tree()) {
BVHLayoutMask layout_mask = device->get_bvh_layout_mask();
BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
return;

View File

@ -24,6 +24,7 @@
#include "scene/svm.h"
#include "scene/tables.h"
#include "scene/volume.h"
#include "scene/devicescene.h"
#include "session/session.h"
#include "util/foreach.h"
@ -33,59 +34,7 @@
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
: bvh_nodes(device, "bvh_nodes", MEM_GLOBAL),
bvh_leaf_nodes(device, "bvh_leaf_nodes", MEM_GLOBAL),
object_node(device, "object_node", MEM_GLOBAL),
prim_type(device, "prim_type", MEM_GLOBAL),
prim_visibility(device, "prim_visibility", MEM_GLOBAL),
prim_index(device, "prim_index", MEM_GLOBAL),
prim_object(device, "prim_object", MEM_GLOBAL),
prim_time(device, "prim_time", MEM_GLOBAL),
tri_verts(device, "tri_verts", MEM_GLOBAL),
tri_shader(device, "tri_shader", MEM_GLOBAL),
tri_vnormal(device, "tri_vnormal", MEM_GLOBAL),
tri_vindex(device, "tri_vindex", MEM_GLOBAL),
tri_patch(device, "tri_patch", MEM_GLOBAL),
tri_patch_uv(device, "tri_patch_uv", MEM_GLOBAL),
curves(device, "curves", MEM_GLOBAL),
curve_keys(device, "curve_keys", MEM_GLOBAL),
curve_segments(device, "curve_segments", MEM_GLOBAL),
patches(device, "patches", MEM_GLOBAL),
points(device, "points", MEM_GLOBAL),
points_shader(device, "points_shader", MEM_GLOBAL),
objects(device, "objects", MEM_GLOBAL),
object_motion_pass(device, "object_motion_pass", MEM_GLOBAL),
object_motion(device, "object_motion", MEM_GLOBAL),
object_flag(device, "object_flag", MEM_GLOBAL),
object_volume_step(device, "object_volume_step", MEM_GLOBAL),
object_prim_offset(device, "object_prim_offset", MEM_GLOBAL),
camera_motion(device, "camera_motion", MEM_GLOBAL),
attributes_map(device, "attributes_map", MEM_GLOBAL),
attributes_float(device, "attributes_float", MEM_GLOBAL),
attributes_float2(device, "attributes_float2", MEM_GLOBAL),
attributes_float3(device, "attributes_float3", MEM_GLOBAL),
attributes_float4(device, "attributes_float4", MEM_GLOBAL),
attributes_uchar4(device, "attributes_uchar4", MEM_GLOBAL),
light_distribution(device, "light_distribution", MEM_GLOBAL),
lights(device, "lights", MEM_GLOBAL),
light_background_marginal_cdf(device, "light_background_marginal_cdf", MEM_GLOBAL),
light_background_conditional_cdf(device, "light_background_conditional_cdf", MEM_GLOBAL),
light_tree_nodes(device, "light_tree_nodes", MEM_GLOBAL),
light_tree_emitters(device, "light_tree_emitters", MEM_GLOBAL),
light_to_tree(device, "light_to_tree", MEM_GLOBAL),
object_to_tree(device, "object_to_tree", MEM_GLOBAL),
object_lookup_offset(device, "object_lookup_offset", MEM_GLOBAL),
triangle_to_tree(device, "triangle_to_tree", MEM_GLOBAL),
particles(device, "particles", MEM_GLOBAL),
svm_nodes(device, "svm_nodes", MEM_GLOBAL),
shaders(device, "shaders", MEM_GLOBAL),
lookup_table(device, "lookup_table", MEM_GLOBAL),
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
ies_lights(device, "ies", MEM_GLOBAL)
{
memset((void *)&data, 0, sizeof(data));
}
Scene::Scene(const SceneParams &params_, Device *device)
: name("Scene"),
@ -601,7 +550,7 @@ static void log_kernel_features(const uint features)
<< "\n";
VLOG_INFO << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE)
<< "\n";
VLOG_INFO << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
VLOG_INFO << "Use MNEE " << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
VLOG_INFO << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT)
<< "\n";
VLOG_INFO << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n";

View File

@ -6,20 +6,16 @@
#include "bvh/params.h"
#include "scene/devicescene.h"
#include "scene/film.h"
#include "scene/image.h"
#include "scene/shader.h"
#include "device/device.h"
#include "device/memory.h"
#include "util/param.h"
#include "util/string.h"
#include "util/system.h"
#include "util/texture.h"
#include "util/thread.h"
#include "util/types.h"
#include "util/vector.h"
CCL_NAMESPACE_BEGIN
@ -54,92 +50,6 @@ class RenderStats;
class SceneUpdateStats;
class Volume;
/* Scene Device Data */
class DeviceScene {
public:
/* BVH */
device_vector<int4> bvh_nodes;
device_vector<int4> bvh_leaf_nodes;
device_vector<int> object_node;
device_vector<int> prim_type;
device_vector<uint> prim_visibility;
device_vector<int> prim_index;
device_vector<int> prim_object;
device_vector<float2> prim_time;
/* mesh */
device_vector<packed_float3> tri_verts;
device_vector<uint> tri_shader;
device_vector<packed_float3> tri_vnormal;
device_vector<packed_uint3> tri_vindex;
device_vector<uint> tri_patch;
device_vector<float2> tri_patch_uv;
device_vector<KernelCurve> curves;
device_vector<float4> curve_keys;
device_vector<KernelCurveSegment> curve_segments;
device_vector<uint> patches;
/* point-cloud */
device_vector<float4> points;
device_vector<uint> points_shader;
/* objects */
device_vector<KernelObject> objects;
device_vector<Transform> object_motion_pass;
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
device_vector<float> object_volume_step;
device_vector<uint> object_prim_offset;
/* cameras */
device_vector<DecomposedTransform> camera_motion;
/* attributes */
device_vector<AttributeMap> attributes_map;
device_vector<float> attributes_float;
device_vector<float2> attributes_float2;
device_vector<packed_float3> attributes_float3;
device_vector<float4> attributes_float4;
device_vector<uchar4> attributes_uchar4;
/* lights */
device_vector<KernelLightDistribution> light_distribution;
device_vector<KernelLight> lights;
device_vector<float2> light_background_marginal_cdf;
device_vector<float2> light_background_conditional_cdf;
/* light tree */
device_vector<KernelLightTreeNode> light_tree_nodes;
device_vector<KernelLightTreeEmitter> light_tree_emitters;
device_vector<uint> light_to_tree;
device_vector<uint> object_to_tree;
device_vector<uint> object_lookup_offset;
device_vector<uint> triangle_to_tree;
/* particles */
device_vector<KernelParticle> particles;
/* shaders */
device_vector<int4> svm_nodes;
device_vector<KernelShader> shaders;
/* lookup tables */
device_vector<float> lookup_table;
/* integrator */
device_vector<float> sample_pattern_lut;
/* IES lights */
device_vector<float> ies_lights;
KernelData data;
DeviceScene(Device *device);
};
/* Scene Parameters */
class SceneParams {

View File

@ -621,12 +621,12 @@ void Session::set_pause(bool pause)
void Session::set_output_driver(unique_ptr<OutputDriver> driver)
{
path_trace_->set_output_driver(move(driver));
path_trace_->set_output_driver(std::move(driver));
}
void Session::set_display_driver(unique_ptr<DisplayDriver> driver)
{
path_trace_->set_display_driver(move(driver));
path_trace_->set_display_driver(std::move(driver));
}
double Session::get_estimated_remaining_time() const

View File

@ -285,7 +285,7 @@ static bool configure_image_spec_from_buffer(ImageSpec *image_spec,
*image_spec = ImageSpec(
buffer_params.width, buffer_params.height, num_channels, TypeDesc::FLOAT);
image_spec->channelnames = move(channel_names);
image_spec->channelnames = std::move(channel_names);
if (!buffer_params_to_image_spec_atttributes(image_spec, buffer_params)) {
return false;

View File

@ -809,7 +809,7 @@ static string path_source_replace_includes_recursive(const string &_source,
const size_t source_length = source.length();
size_t index = 0;
/* Information about where we are in the source. */
size_t line_number = 0, column_number = 1;
size_t column_number = 1;
/* Currently gathered non-preprocessor token.
* Store as start/length rather than token itself to avoid overhead of
* memory re-allocations on each character concatenation.
@ -842,7 +842,6 @@ static string path_source_replace_includes_recursive(const string &_source,
preprocessor_line = "";
}
column_number = 0;
++line_number;
}
else if (ch == '#' && column_number == 1 && !inside_preprocessor) {
/* Append all possible non-preprocessor token to the result. */

View File

@ -4,7 +4,6 @@
#ifndef __UTIL_VECTOR_H__
#define __UTIL_VECTOR_H__
#include <cassert>
#include <cstring>
#include <vector>

View File

@ -897,12 +897,10 @@ void Octree::printPath(PathElement *path)
void Octree::printPaths(PathList *path)
{
PathList *iter = path;
int i = 0;
while (iter != NULL) {
dc_printf("Path %d:\n", i);
printPath(iter);
iter = iter->next;
i++;
}
}
@ -1256,7 +1254,6 @@ Node *Octree::connectFace(
updateParent(&newnode->internal, len, st);
int flag = 0;
// Add the cells to the rings and fill in the patch
PathElement *newEleN;
if (curEleN->pos[0] != stN[0] || curEleN->pos[1] != stN[1] || curEleN->pos[2] != stN[2]) {
@ -1286,7 +1283,6 @@ Node *Octree::connectFace(
alpha);
curEleN = newEleN;
flag++;
}
PathElement *newEleP;
@ -1316,7 +1312,6 @@ Node *Octree::connectFace(
alpha);
curEleP = newEleP;
flag++;
}
/*
@ -1543,6 +1538,8 @@ void Octree::getFacePoint(PathElement *leaf, int dir, int &x, int &y, float &p,
float off[3];
int num = 0, num2 = 0;
(void)num2; // Unused in release builds.
LeafNode *leafnode = locateLeaf(leaf->pos);
for (int i = 0; i < 4; i++) {
int edgeind = faceMap[dir * 2][i];

View File

@ -72,7 +72,7 @@ class GHOST_ISystemPaths {
/**
* Add the file to the operating system most recently used files
*/
virtual void addToSystemRecentFiles(const char *filename) const = 0;
virtual void addToSystemRecentFiles(const char *filepath) const = 0;
private:
/** The one and only system paths. */

View File

@ -61,7 +61,7 @@ extern const char *GHOST_getBinaryDir(void);
/**
* Add the file to the operating system most recently used files
*/
extern void GHOST_addToSystemRecentFiles(const char *filename);
extern void GHOST_addToSystemRecentFiles(const char *filepath);
#ifdef __cplusplus
}

View File

@ -10,6 +10,9 @@
#include "GHOST_Context.hh"
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# include <epoxy/wgl.h>
# include <tchar.h>
#
@ -18,6 +21,8 @@
# endif
#endif
#include <epoxy/gl.h>
#include <cstdio>
#include <cstring>

View File

@ -11,8 +11,6 @@
#include "GHOST_IContext.hh"
#include "GHOST_Types.h"
#include <epoxy/gl.h>
#include <cstdlib> // for NULL
class GHOST_Context : public GHOST_IContext {

View File

@ -19,6 +19,8 @@
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include <epoxy/gl.h>
#include <cassert>
#include <vector>

View File

@ -903,13 +903,14 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
auto extensions_available = getExtensionsAvailable();
vector<const char *> layers_enabled;
if (m_debug) {
enableLayer(layers_available, layers_enabled, VkLayer::KHRONOS_validation, m_debug);
}
vector<const char *> extensions_device;
vector<const char *> extensions_enabled;
if (m_debug) {
enableLayer(layers_available, layers_enabled, VkLayer::KHRONOS_validation, m_debug);
requireExtension(extensions_available, extensions_enabled, VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
if (use_window_surface) {
const char *native_surface_extension_name = getPlatformSpecificSurfaceExtension();

View File

@ -57,4 +57,4 @@ class GHOST_IXrGraphicsBinding {
};
std::unique_ptr<GHOST_IXrGraphicsBinding> GHOST_XrGraphicsBindingCreateFromType(
GHOST_TXrGraphicsBinding type, GHOST_Context &ghost_ctx);
GHOST_TXrGraphicsBinding type, GHOST_Context &context);

View File

@ -47,5 +47,5 @@ class GHOST_SystemPaths : public GHOST_ISystemPaths {
/**
* Add the file to the operating system most recently used files
*/
virtual void addToSystemRecentFiles(const char *filename) const = 0;
virtual void addToSystemRecentFiles(const char *filepath) const = 0;
};

View File

@ -54,5 +54,5 @@ class GHOST_SystemPathsCocoa : public GHOST_SystemPaths {
/**
* Add the file to the operating system most recently used files
*/
void addToSystemRecentFiles(const char *filename) const;
void addToSystemRecentFiles(const char *filepath) const;
};

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2010 Blender Foundation */
#import <AppKit/NSDocumentController.h>
#import <Foundation/Foundation.h>
#include "GHOST_Debug.hh"
@ -112,7 +113,10 @@ const char *GHOST_SystemPathsCocoa::getBinaryDir() const
return tempPath;
}
void GHOST_SystemPathsCocoa::addToSystemRecentFiles(const char *filename) const
void GHOST_SystemPathsCocoa::addToSystemRecentFiles(const char *filepath) const
{
/* TODO: implement for macOS */
@autoreleasepool {
NSURL *const file_url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filepath]];
[[NSDocumentController sharedDocumentController] noteNewRecentDocumentURL:file_url];
}
}

View File

@ -52,5 +52,5 @@ class GHOST_SystemPathsUnix : public GHOST_SystemPaths {
/**
* Add the file to the operating system most recently used files
*/
void addToSystemRecentFiles(const char *filename) const;
void addToSystemRecentFiles(const char *filepath) const;
};

View File

@ -117,10 +117,10 @@ const char *GHOST_SystemPathsWin32::getBinaryDir() const
return NULL;
}
void GHOST_SystemPathsWin32::addToSystemRecentFiles(const char *filename) const
void GHOST_SystemPathsWin32::addToSystemRecentFiles(const char *filepath) const
{
/* SHARD_PATH resolves to SHARD_PATHA for non-UNICODE build */
UTF16_ENCODE(filename);
SHAddToRecentDocs(SHARD_PATHW, filename_16);
UTF16_UN_ENCODE(filename);
UTF16_ENCODE(filepath);
SHAddToRecentDocs(SHARD_PATHW, filepath_16);
UTF16_UN_ENCODE(filepath);
}

View File

@ -61,5 +61,5 @@ class GHOST_SystemPathsWin32 : public GHOST_SystemPaths {
/**
* Add the file to the operating system most recently used files
*/
void addToSystemRecentFiles(const char *filename) const;
void addToSystemRecentFiles(const char *filepath) const;
};

View File

@ -193,6 +193,8 @@ static bool use_gnome_confine_hack = false;
# define USE_GNOME_NEEDS_LIBDECOR_HACK
#endif
/** \} */
/* -------------------------------------------------------------------- */
/** \name Local Defines
*
@ -5584,8 +5586,6 @@ GHOST_SystemWayland::GHOST_SystemWayland(bool background)
# endif
display_destroy_and_free_all();
throw std::runtime_error("Wayland: unable to find libdecor!");
use_libdecor = true;
}
}
else {
@ -5608,7 +5608,7 @@ GHOST_SystemWayland::GHOST_SystemWayland(bool background)
(void)background;
#endif
{
GWL_XDG_Decor_System &decor = *display_->xdg_decor;
const GWL_XDG_Decor_System &decor = *display_->xdg_decor;
if (!decor.shell) {
display_destroy_and_free_all();
throw std::runtime_error("Wayland: unable to access xdg_shell!");
@ -6069,10 +6069,8 @@ static GHOST_TSuccess getCursorPositionClientRelative_impl(
/* As the cursor is restored at the warped location,
* apply warping when requesting the cursor location. */
GHOST_Rect wrap_bounds{};
if (win->getCursorGrabModeIsWarp()) {
if (win->getCursorGrabBounds(wrap_bounds) == GHOST_kFailure) {
win->getClientBounds(wrap_bounds);
}
if (win->getCursorGrabBounds(wrap_bounds) == GHOST_kFailure) {
win->getClientBounds(wrap_bounds);
}
int xy_wrap[2] = {
seat_state_pointer->xy[0],
@ -6307,6 +6305,7 @@ GHOST_IContext *GHOST_SystemWayland::createOffscreenContext(GHOST_GLSettings glS
delete context;
return nullptr;
}
context->setUserData(wl_surface);
return context;
}
#else
@ -6345,7 +6344,9 @@ GHOST_TSuccess GHOST_SystemWayland::disposeContext(GHOST_IContext *context)
delete context;
wl_egl_window *egl_window = (wl_egl_window *)wl_surface_get_user_data(wl_surface);
wl_egl_window_destroy(egl_window);
if (egl_window != nullptr) {
wl_egl_window_destroy(egl_window);
}
wl_surface_destroy(wl_surface);
return GHOST_kSuccess;
@ -6675,10 +6676,9 @@ GHOST_TSuccess GHOST_SystemWayland::cursor_shape_custom_set(uint8_t *bitmap,
static constexpr uint32_t transparent = 0x00000000;
uint8_t datab = 0, maskb = 0;
uint32_t *pixel;
for (int y = 0; y < sizey; ++y) {
pixel = &static_cast<uint32_t *>(cursor->custom_data)[y * sizex];
uint32_t *pixel = &static_cast<uint32_t *>(cursor->custom_data)[y * sizex];
for (int x = 0; x < sizex; ++x) {
if ((x % 8) == 0) {
datab = *bitmap++;

View File

@ -37,15 +37,15 @@ bool ghost_wl_output_own(const struct wl_output *wl_output);
void ghost_wl_output_tag(struct wl_output *wl_output);
struct GWL_Output *ghost_wl_output_user_data(struct wl_output *wl_output);
bool ghost_wl_surface_own(const struct wl_surface *surface);
void ghost_wl_surface_tag(struct wl_surface *surface);
GHOST_WindowWayland *ghost_wl_surface_user_data(struct wl_surface *surface);
bool ghost_wl_surface_own(const struct wl_surface *wl_surface);
void ghost_wl_surface_tag(struct wl_surface *wl_surface);
GHOST_WindowWayland *ghost_wl_surface_user_data(struct wl_surface *wl_surface);
bool ghost_wl_surface_own_cursor_pointer(const struct wl_surface *surface);
void ghost_wl_surface_tag_cursor_pointer(struct wl_surface *surface);
bool ghost_wl_surface_own_cursor_pointer(const struct wl_surface *wl_surface);
void ghost_wl_surface_tag_cursor_pointer(struct wl_surface *wl_surface);
bool ghost_wl_surface_own_cursor_tablet(const struct wl_surface *surface);
void ghost_wl_surface_tag_cursor_tablet(struct wl_surface *surface);
bool ghost_wl_surface_own_cursor_tablet(const struct wl_surface *wl_surface);
void ghost_wl_surface_tag_cursor_tablet(struct wl_surface *wl_surface);
/* Scaling to: translates from WAYLAND into GHOST (viewport local) coordinates.
* Scaling from: performs the reverse translation.

View File

@ -85,7 +85,7 @@ static uchar bit_is_on(const uchar *ptr, int bit)
static GHOST_TKey ghost_key_from_keysym(const KeySym key);
static GHOST_TKey ghost_key_from_keycode(const XkbDescPtr xkb_descr, const KeyCode keycode);
static GHOST_TKey ghost_key_from_keysym_or_keycode(const KeySym key,
static GHOST_TKey ghost_key_from_keysym_or_keycode(const KeySym key_sym,
const XkbDescPtr xkb_descr,
const KeyCode keycode);
@ -99,7 +99,12 @@ static bool use_xwayland_hack = false;
using namespace std;
GHOST_SystemX11::GHOST_SystemX11() : GHOST_System(), m_xkb_descr(nullptr), m_start_time(0)
GHOST_SystemX11::GHOST_SystemX11()
: GHOST_System(),
m_xkb_descr(nullptr),
m_start_time(0),
m_keyboard_vector{0},
m_keycode_last_repeat_key(uint(-1))
{
XInitThreads();
m_display = XOpenDisplay(nullptr);
@ -897,7 +902,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
#endif /* WITH_X11_XINPUT */
switch (xe->type) {
case Expose: {
XExposeEvent &xee = xe->xexpose;
const XExposeEvent &xee = xe->xexpose;
if (xee.count == 0) {
/* Only generate a single expose event
@ -909,7 +914,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
}
case MotionNotify: {
XMotionEvent &xme = xe->xmotion;
const XMotionEvent &xme = xe->xmotion;
bool is_tablet = window->GetTabletData().Active != GHOST_kTabletModeNone;
@ -1235,7 +1240,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
case ButtonPress:
case ButtonRelease: {
XButtonEvent &xbe = xe->xbutton;
const XButtonEvent &xbe = xe->xbutton;
GHOST_TButton gbmask = GHOST_kButtonMaskLeft;
GHOST_TEventType type = (xbe.type == ButtonPress) ? GHOST_kEventButtonDown :
GHOST_kEventButtonUp;
@ -1290,14 +1295,14 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
/* change of size, border, layer etc. */
case ConfigureNotify: {
// XConfigureEvent & xce = xe->xconfigure;
// const XConfigureEvent & xce = xe->xconfigure;
g_event = new GHOST_Event(getMilliSeconds(), GHOST_kEventWindowSize, window);
break;
}
case FocusIn:
case FocusOut: {
XFocusChangeEvent &xfe = xe->xfocus;
const XFocusChangeEvent &xfe = xe->xfocus;
/* TODO: make sure this is the correct place for activate/deactivate */
// printf("X: focus %s for window %d\n",
@ -1385,7 +1390,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
* (really crossing between windows) since some window-managers
* also send grab/un-grab crossings for mouse-wheel events.
*/
XCrossingEvent &xce = xe->xcrossing;
const XCrossingEvent &xce = xe->xcrossing;
if (xce.mode == NotifyNormal) {
g_event = new GHOST_EventCursor(getMilliSeconds(),
GHOST_kEventCursorMove,
@ -1776,11 +1781,11 @@ bool GHOST_SystemX11::generateWindowExposeEvents()
return anyProcessed;
}
static GHOST_TKey ghost_key_from_keysym_or_keycode(const KeySym keysym,
static GHOST_TKey ghost_key_from_keysym_or_keycode(const KeySym key_sym,
XkbDescPtr xkb_descr,
const KeyCode keycode)
{
GHOST_TKey type = ghost_key_from_keysym(keysym);
GHOST_TKey type = ghost_key_from_keysym(key_sym);
if (type == GHOST_kKeyUnknown) {
if (xkb_descr) {
type = ghost_key_from_keycode(xkb_descr, keycode);
@ -2376,7 +2381,7 @@ class DialogData {
}
/* Is the mouse inside the given button */
bool isInsideButton(XEvent &e, uint button_num)
bool isInsideButton(const XEvent &e, uint button_num)
{
return (
(e.xmotion.y > int(height - padding_y - button_height)) &&

Some files were not shown because too many files have changed in this diff Show More