Compare commits
3 Commits
file-brows
...
soc-2022-t
Author | SHA1 | Date | |
---|---|---|---|
422c330390 | |||
8cd76bff47 | |||
9d1cfd9dde |
@@ -265,7 +265,6 @@ ForEachMacros:
|
||||
- SET_SLOT_PROBING_BEGIN
|
||||
- MAP_SLOT_PROBING_BEGIN
|
||||
- VECTOR_SET_SLOT_PROBING_BEGIN
|
||||
- WL_ARRAY_FOR_EACH
|
||||
|
||||
StatementMacros:
|
||||
- PyObject_HEAD
|
||||
|
@@ -222,17 +222,6 @@ if(UNIX AND NOT (APPLE OR HAIKU))
|
||||
|
||||
option(WITH_GHOST_WAYLAND "Enable building Blender against Wayland for windowing (under development)" OFF)
|
||||
mark_as_advanced(WITH_GHOST_WAYLAND)
|
||||
|
||||
if (WITH_GHOST_WAYLAND)
|
||||
option(WITH_GHOST_WAYLAND_LIBDECOR "Optionally build with LibDecor window decorations" OFF)
|
||||
mark_as_advanced(WITH_GHOST_WAYLAND_LIBDECOR)
|
||||
|
||||
option(WITH_GHOST_WAYLAND_DBUS "Optionally build with DBUS support (used for Cursor themes). May hang on startup systems where DBUS is not used." OFF)
|
||||
mark_as_advanced(WITH_GHOST_WAYLAND_DBUS)
|
||||
|
||||
option(WITH_GHOST_WAYLAND_DYNLOAD "Enable runtime dynamic WAYLAND libraries loading" OFF)
|
||||
mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_GHOST_X11)
|
||||
@@ -266,11 +255,19 @@ if(WITH_GHOST_X11)
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
option(WITH_SYSTEM_GLEW "Use GLEW OpenGL wrapper library provided by the operating system" OFF)
|
||||
option(WITH_SYSTEM_GLES "Use OpenGL ES library provided by the operating system" ON)
|
||||
option(WITH_SYSTEM_FREETYPE "Use the freetype library provided by the operating system" OFF)
|
||||
option(WITH_SYSTEM_EIGEN3 "Use the systems Eigen3 library" OFF)
|
||||
else()
|
||||
# not an option for other OS's
|
||||
set(WITH_SYSTEM_GLEW OFF)
|
||||
set(WITH_SYSTEM_GLES OFF)
|
||||
set(WITH_SYSTEM_FREETYPE OFF)
|
||||
set(WITH_SYSTEM_EIGEN3 OFF)
|
||||
endif()
|
||||
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
option(WITH_SYSTEM_EIGEN3 "Use the systems Eigen3 library" OFF)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -447,7 +444,7 @@ endif()
|
||||
if(NOT APPLE)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||
endif()
|
||||
@@ -457,21 +454,6 @@ if(APPLE)
|
||||
option(WITH_CYCLES_DEVICE_METAL "Enable Cycles Apple Metal compute support" ON)
|
||||
endif()
|
||||
|
||||
# oneAPI
|
||||
if(NOT APPLE)
|
||||
option(WITH_CYCLES_DEVICE_ONEAPI "Enable Cycles oneAPI compute support" OFF)
|
||||
option(WITH_CYCLES_ONEAPI_BINARIES "Enable Ahead-Of-Time compilation for Cycles oneAPI device" OFF)
|
||||
option(WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED "Enable use of SYCL host (CPU) device execution by oneAPI implementation. This option is for debugging purposes and impacts GPU execution." OFF)
|
||||
|
||||
# https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
|
||||
SET (CYCLES_ONEAPI_SPIR64_GEN_DEVICES "dg2" CACHE STRING "oneAPI Intel GPU architectures to build binaries for")
|
||||
SET (CYCLES_ONEAPI_SYCL_TARGETS spir64 spir64_gen CACHE STRING "oneAPI targets to build AOT binaries for")
|
||||
|
||||
mark_as_advanced(WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
|
||||
mark_as_advanced(CYCLES_ONEAPI_SPIR64_GEN_DEVICES)
|
||||
mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS)
|
||||
endif()
|
||||
|
||||
# Draw Manager
|
||||
option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
|
||||
mark_as_advanced(WITH_DRAW_DEBUG)
|
||||
@@ -536,48 +518,20 @@ endif()
|
||||
|
||||
# OpenGL
|
||||
|
||||
# Experimental EGL option.
|
||||
option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, GLX or WGL)" OFF)
|
||||
mark_as_advanced(WITH_GL_EGL)
|
||||
|
||||
if(WITH_GHOST_WAYLAND)
|
||||
# Wayland can only use EGL to create OpenGL contexts, not GLX.
|
||||
set(WITH_GL_EGL ON)
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
if(WITH_GL_EGL)
|
||||
# GLEW can only be built with either GLX or EGL support. Most binary distributions are
|
||||
# built with GLX support and we have no automated way to detect this. So always build
|
||||
# GLEW from source to be sure it has EGL support.
|
||||
set(WITH_SYSTEM_GLEW OFF)
|
||||
else()
|
||||
option(WITH_SYSTEM_GLEW "Use GLEW OpenGL wrapper library provided by the operating system" OFF)
|
||||
endif()
|
||||
|
||||
option(WITH_SYSTEM_GLES "Use OpenGL ES library provided by the operating system" ON)
|
||||
else()
|
||||
# System GLEW and GLES not an option on other platforms.
|
||||
set(WITH_SYSTEM_GLEW OFF)
|
||||
set(WITH_SYSTEM_GLES OFF)
|
||||
endif()
|
||||
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
|
||||
option(WITH_GLEW_ES "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
|
||||
option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)" OFF)
|
||||
option(WITH_GL_PROFILE_ES20 "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)" OFF)
|
||||
option(WITH_GPU_BUILDTIME_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
|
||||
option(WITH_GPU_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
|
||||
|
||||
mark_as_advanced(
|
||||
WITH_OPENGL
|
||||
WITH_GLEW_ES
|
||||
WITH_GL_EGL
|
||||
WITH_GL_PROFILE_ES20
|
||||
WITH_GPU_BUILDTIME_SHADER_BUILDER
|
||||
WITH_GPU_SHADER_BUILDER
|
||||
)
|
||||
|
||||
if(WITH_HEADLESS)
|
||||
set(WITH_OPENGL OFF)
|
||||
endif()
|
||||
|
||||
# Metal
|
||||
|
||||
if (APPLE)
|
||||
|
@@ -29,12 +29,10 @@ cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
include(ExternalProject)
|
||||
include(cmake/check_software.cmake)
|
||||
include(cmake/options.cmake)
|
||||
# versions.cmake needs to be included after options.cmake due to the BLENDER_PLATFORM_ARM variable being needed.
|
||||
include(cmake/versions.cmake)
|
||||
include(cmake/options.cmake)
|
||||
include(cmake/boost_build_options.cmake)
|
||||
include(cmake/download.cmake)
|
||||
include(cmake/macros.cmake)
|
||||
|
||||
if(ENABLE_MINGW64)
|
||||
include(cmake/setup_mingw64.cmake)
|
||||
@@ -98,15 +96,6 @@ include(cmake/fmt.cmake)
|
||||
include(cmake/robinmap.cmake)
|
||||
if(NOT APPLE)
|
||||
include(cmake/xr_openxr.cmake)
|
||||
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
|
||||
include(cmake/dpcpp.cmake)
|
||||
include(cmake/dpcpp_deps.cmake)
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
include(cmake/igc.cmake)
|
||||
include(cmake/gmmlib.cmake)
|
||||
include(cmake/ocloc.cmake)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# OpenColorIO and dependencies.
|
||||
|
@@ -56,7 +56,10 @@ if(UNIX)
|
||||
"On Debian and Ubuntu:\n"
|
||||
" apt install autoconf automake libtool yasm tcl ninja-build meson python3-mako\n"
|
||||
"\n"
|
||||
"On macOS (with homebrew):\n"
|
||||
"On macOS Intel (with homebrew):\n"
|
||||
" brew install autoconf automake bison libtool pkg-config yasm\n"
|
||||
"\n"
|
||||
"On macOS ARM (with homebrew):\n"
|
||||
" brew install autoconf automake bison flex libtool pkg-config yasm\n"
|
||||
"\n"
|
||||
"Other platforms:\n"
|
||||
|
@@ -101,18 +101,3 @@ download_source(ROBINMAP)
|
||||
download_source(IMATH)
|
||||
download_source(PYSTRING)
|
||||
download_source(LEVEL_ZERO)
|
||||
download_source(DPCPP)
|
||||
download_source(VCINTRINSICS)
|
||||
download_source(OPENCLHEADERS)
|
||||
download_source(ICDLOADER)
|
||||
download_source(MP11)
|
||||
download_source(SPIRV_HEADERS)
|
||||
download_source(IGC)
|
||||
download_source(IGC_LLVM)
|
||||
download_source(IGC_OPENCL_CLANG)
|
||||
download_source(IGC_VCINTRINSICS)
|
||||
download_source(IGC_SPIRV_HEADERS)
|
||||
download_source(IGC_SPIRV_TOOLS)
|
||||
download_source(IGC_SPIRV_TRANSLATOR)
|
||||
download_source(GMMLIB)
|
||||
download_source(OCLOC)
|
||||
|
@@ -1,109 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
||||
if(WIN32)
|
||||
set(LLVM_GENERATOR "Ninja")
|
||||
else()
|
||||
set(LLVM_GENERATOR "Unix Makefiles")
|
||||
endif()
|
||||
|
||||
set(DPCPP_CONFIGURE_ARGS
|
||||
# When external deps dpcpp needs are not found it will automatically
|
||||
# download the during the configure stage using FetchContent. Given
|
||||
# we need to keep an archive of all source used during build for compliance
|
||||
# reasons it CANNOT download anything we do not know about. By setting
|
||||
# this property to ON, all downloads are disabled, and we will have to
|
||||
# provide the missing deps some other way, a build error beats a compliance
|
||||
# violation
|
||||
--cmake-opt FETCHCONTENT_FULLY_DISCONNECTED=ON
|
||||
)
|
||||
set(DPCPP_SOURCE_ROOT ${BUILD_DIR}/dpcpp/src/external_dpcpp/)
|
||||
set(DPCPP_EXTRA_ARGS
|
||||
# When external deps dpcpp needs are not found it will automatically
|
||||
# download the during the configure stage using FetchContent. Given
|
||||
# we need to keep an archive of all source used during build for compliance
|
||||
# reasons it CANNOT download anything we do not know about. By setting
|
||||
# this property to ON, all downloads are disabled, and we will have to
|
||||
# provide the missing deps some other way, a build or configure error
|
||||
# beats a compliance violation
|
||||
-DFETCHCONTENT_FULLY_DISCONNECTED=ON
|
||||
-DLLVMGenXIntrinsics_SOURCE_DIR=${BUILD_DIR}/vcintrinsics/src/external_vcintrinsics/
|
||||
-DOpenCL_HEADERS=file://${PACKAGE_DIR}/${OPENCLHEADERS_FILE}
|
||||
-DOpenCL_LIBRARY_SRC=file://${PACKAGE_DIR}/${ICDLOADER_FILE}
|
||||
-DBOOST_MP11_SOURCE_DIR=${BUILD_DIR}/mp11/src/external_mp11/
|
||||
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
|
||||
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
|
||||
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
|
||||
# Below here is copied from an invocation of buildbot/config.py
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON
|
||||
-DLLVM_TARGETS_TO_BUILD=X86
|
||||
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
|
||||
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
|
||||
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
|
||||
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
|
||||
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
|
||||
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
|
||||
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
|
||||
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
|
||||
-DLIBCLC_TARGETS_TO_BUILD=
|
||||
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
|
||||
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
|
||||
-DLLVM_BUILD_TOOLS=ON
|
||||
-DSYCL_ENABLE_WERROR=OFF
|
||||
-DSYCL_INCLUDE_TESTS=ON
|
||||
-DLLVM_ENABLE_DOXYGEN=OFF
|
||||
-DLLVM_ENABLE_SPHINX=OFF
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
-DSYCL_ENABLE_XPTI_TRACING=ON
|
||||
-DLLVM_ENABLE_LLD=OFF
|
||||
-DXPTI_ENABLE_WERROR=OFF
|
||||
-DSYCL_CLANG_EXTRA_FLAGS=
|
||||
-DSYCL_ENABLE_PLUGINS=level_zero
|
||||
-DCMAKE_INSTALL_RPATH=\$ORIGIN
|
||||
-DPython3_ROOT_DIR=${LIBDIR}/python/
|
||||
-DPython3_EXECUTABLE=${PYTHON_BINARY}
|
||||
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
|
||||
-DLLDB_ENABLE_CURSES=OFF
|
||||
-DLLVM_ENABLE_TERMINFO=OFF
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
list(APPEND DPCPP_EXTRA_ARGS -DPython3_FIND_REGISTRY=NEVER)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_dpcpp
|
||||
URL file://${PACKAGE_DIR}/${DPCPP_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${DPCPP_HASH_TYPE}=${DPCPP_HASH}
|
||||
PREFIX ${BUILD_DIR}/dpcpp
|
||||
CMAKE_GENERATOR ${LLVM_GENERATOR}
|
||||
SOURCE_SUBDIR llvm
|
||||
LIST_SEPARATOR ^^
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/dpcpp ${DEFAULT_CMAKE_FLAGS} ${DPCPP_EXTRA_ARGS}
|
||||
#CONFIGURE_COMMAND ${PYTHON_BINARY} ${BUILD_DIR}/dpcpp/src/external_dpcpp/buildbot/configure.py ${DPCPP_CONFIGURE_ARGS}
|
||||
#BUILD_COMMAND echo "." #${PYTHON_BINARY} ${BUILD_DIR}/dpcpp/src/external_dpcpp/buildbot/compile.py
|
||||
INSTALL_COMMAND ${CMAKE_COMMAND} --build . -- deploy-sycl-toolchain
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/dpcpp/src/external_dpcpp < ${PATCH_DIR}/dpcpp.diff
|
||||
INSTALL_DIR ${LIBDIR}/dpcpp
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
external_dpcpp
|
||||
external_python
|
||||
external_python_site_packages
|
||||
external_vcintrinsics
|
||||
external_openclheaders
|
||||
external_icdloader
|
||||
external_mp11
|
||||
external_level-zero
|
||||
external_spirvheaders
|
||||
)
|
||||
|
||||
if(BUILD_MODE STREQUAL Release AND WIN32)
|
||||
ExternalProject_Add_Step(external_dpcpp after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
|
||||
)
|
||||
endif()
|
@@ -1,61 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# These are build time requirements for dpcpp
|
||||
# We only have to unpack these dpcpp will build
|
||||
# them.
|
||||
|
||||
ExternalProject_Add(external_vcintrinsics
|
||||
URL file://${PACKAGE_DIR}/${VCINTRINSICS_FILE}
|
||||
URL_HASH ${VCINTRINSICS_HASH_TYPE}=${VCINTRINSICS_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/vcintrinsics
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
# opencl headers do not have to be unpacked, dpcpp will do it
|
||||
# but it wouldn't hurt to do it anyway as an opertunity to validate
|
||||
# the hash is correct.
|
||||
ExternalProject_Add(external_openclheaders
|
||||
URL file://${PACKAGE_DIR}/${OPENCLHEADERS_FILE}
|
||||
URL_HASH ${OPENCLHEADERS_HASH_TYPE}=${OPENCLHEADERS_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/openclheaders
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
# icdloader does not have to be unpacked, dpcpp will do it
|
||||
# but it wouldn't hurt to do it anyway as an opertunity to validate
|
||||
# the hash is correct.
|
||||
ExternalProject_Add(external_icdloader
|
||||
URL file://${PACKAGE_DIR}/${ICDLOADER_FILE}
|
||||
URL_HASH ${ICDLOADER_HASH_TYPE}=${ICDLOADER_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/icdloader
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_mp11
|
||||
URL file://${PACKAGE_DIR}/${MP11_FILE}
|
||||
URL_HASH ${MP11_HASH_TYPE}=${MP11_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/mp11
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_spirvheaders
|
||||
URL file://${PACKAGE_DIR}/${SPIRV_HEADERS_FILE}
|
||||
URL_HASH ${SPIRV_HEADERS_HASH_TYPE}=${SPIRV_HEADERS_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/spirvheaders
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
@@ -10,12 +10,18 @@ set(EMBREE_EXTRA_ARGS
|
||||
-DEMBREE_RAY_MASK=ON
|
||||
-DEMBREE_FILTER_FUNCTION=ON
|
||||
-DEMBREE_BACKFACE_CULLING=OFF
|
||||
-DEMBREE_MAX_ISA=AVX2
|
||||
-DEMBREE_TASKING_SYSTEM=TBB
|
||||
-DEMBREE_TBB_ROOT=${LIBDIR}/tbb
|
||||
-DTBB_ROOT=${LIBDIR}/tbb
|
||||
-DTBB_STATIC_LIB=${TBB_STATIC_LIBRARY}
|
||||
)
|
||||
|
||||
if (NOT BLENDER_PLATFORM_ARM)
|
||||
if(BLENDER_PLATFORM_ARM)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
-DEMBREE_MAX_ISA=NEON)
|
||||
else()
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
-DEMBREE_MAX_ISA=AVX2)
|
||||
@@ -24,10 +30,23 @@ endif()
|
||||
if(TBB_STATIC_LIBRARY)
|
||||
set(EMBREE_EXTRA_ARGS
|
||||
${EMBREE_EXTRA_ARGS}
|
||||
-DEMBREE_TBB_COMPONENT=tbb_static
|
||||
-DEMBREE_TBB_LIBRARY_NAME=tbb_static
|
||||
-DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_static
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
set(EMBREE_BUILD_DIR ${BUILD_MODE}/)
|
||||
if(BUILD_MODE STREQUAL Debug)
|
||||
list(APPEND EMBREE_EXTRA_ARGS
|
||||
-DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_debug
|
||||
-DEMBREE_TBB_LIBRARY_NAME=tbb_debug
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
set(EMBREE_BUILD_DIR)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_embree
|
||||
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
|
@@ -5,8 +5,6 @@ ExternalProject_Add(external_flex
|
||||
URL_HASH ${FLEX_HASH_TYPE}=${FLEX_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/flex
|
||||
# This patch fixes build with some versions of glibc (https://github.com/westes/flex/commit/24fd0551333e7eded87b64dd36062da3df2f6380)
|
||||
PATCH_COMMAND ${PATCH_CMD} -d ${BUILD_DIR}/flex/src/external_flex < ${PATCH_DIR}/flex.diff
|
||||
CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && ${CONFIGURE_COMMAND} --prefix=${LIBDIR}/flex
|
||||
BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make -j${MAKE_THREADS}
|
||||
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make install
|
||||
|
@@ -1,13 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(GMMLIB_EXTRA_ARGS
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_gmmlib
|
||||
URL file://${PACKAGE_DIR}/${GMMLIB_FILE}
|
||||
URL_HASH ${GMMLIB_HASH_TYPE}=${GMMLIB_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/gmmlib
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/gmmlib ${DEFAULT_CMAKE_FLAGS} ${GMMLIB_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/gmmlib
|
||||
)
|
@@ -192,10 +192,6 @@ harvest(zstd/lib zstd/lib "*.a")
|
||||
if(UNIX AND NOT APPLE)
|
||||
harvest(libglu/lib mesa/lib "*.so*")
|
||||
harvest(mesa/lib64 mesa/lib "*.so*")
|
||||
|
||||
harvest(dpcpp dpcpp "*")
|
||||
harvest(igc dpcpp/lib/igc "*")
|
||||
harvest(ocloc dpcpp/lib/ocloc "*")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
@@ -1,126 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
unpack_only(igc_vcintrinsics)
|
||||
unpack_only(igc_spirv_headers)
|
||||
unpack_only(igc_spirv_tools)
|
||||
|
||||
#
|
||||
# igc_opencl_clang contains patches that need to be applied
|
||||
# to external_igc_llvm and igc_spirv_translator, we unpack
|
||||
# igc_opencl_clang first, then have the patch stages of
|
||||
# external_igc_llvm and igc_spirv_translator apply them.
|
||||
#
|
||||
|
||||
ExternalProject_Add(external_igc_opencl_clang
|
||||
URL file://${PACKAGE_DIR}/${IGC_OPENCL_CLANG_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${IGC_OPENCL_CLANG_HASH_TYPE}=${IGC_OPENCL_CLANG_HASH}
|
||||
PREFIX ${BUILD_DIR}/igc_opencl_clang
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/ < ${PATCH_DIR}/igc_opencl_clang.diff
|
||||
)
|
||||
|
||||
set(IGC_OPENCL_CLANG_PATCH_DIR ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/patches)
|
||||
set(IGC_LLVM_SOURCE_DIR ${BUILD_DIR}/igc_llvm/src/external_igc_llvm)
|
||||
set(IGC_SPIRV_TRANSLATOR_SOURCE_DIR ${BUILD_DIR}/igc_spirv_translator/src/external_igc_spirv_translator)
|
||||
|
||||
ExternalProject_Add(external_igc_llvm
|
||||
URL file://${PACKAGE_DIR}/${IGC_LLVM_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${IGC_LLVM_HASH_TYPE}=${IGC_LLVM_HASH}
|
||||
PREFIX ${BUILD_DIR}/igc_llvm
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0001-OpenCL-3.0-support.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0002-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0003-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
|
||||
)
|
||||
add_dependencies(
|
||||
external_igc_llvm
|
||||
external_igc_opencl_clang
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_igc_spirv_translator
|
||||
URL file://${PACKAGE_DIR}/${IGC_SPIRV_TRANSLATOR_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${IGC_SPIRV_TRANSLATOR_HASH_TYPE}=${IGC_SPIRV_TRANSLATOR_HASH}
|
||||
PREFIX ${BUILD_DIR}/igc_spirv_translator
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
|
||||
)
|
||||
add_dependencies(
|
||||
external_igc_spirv_translator
|
||||
external_igc_opencl_clang
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
set(IGC_GENERATOR "Ninja")
|
||||
set(IGC_TARGET Windows64)
|
||||
else()
|
||||
set(IGC_GENERATOR "Unix Makefiles")
|
||||
set(IGC_TARGET Linux64)
|
||||
endif()
|
||||
|
||||
set(IGC_EXTRA_ARGS
|
||||
-DIGC_OPTION__ARCHITECTURE_TARGET=${IGC_TARGET}
|
||||
-DIGC_OPTION__ARCHITECTURE_HOST=${IGC_TARGET}
|
||||
)
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
list(APPEND IGC_EXTRA_ARGS
|
||||
-DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
|
||||
-DFLEX_INCLUDE_DIR=${LIBDIR}/flex/include
|
||||
)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_igc
|
||||
URL file://${PACKAGE_DIR}/${IGC_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${IGC_HASH_TYPE}=${IGC_HASH}
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/igc ${DEFAULT_CMAKE_FLAGS} ${IGC_EXTRA_ARGS}
|
||||
|
||||
# IGC is pretty set in its way where sub projects ought to live, for some it offers
|
||||
# hooks to supply alternatives folders, other are just hardocded with no way to configure
|
||||
# we symlink everything here, since it's less work than trying to convince the cmake
|
||||
# scripts to accept alternative locations.
|
||||
#
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_llvm/src/external_igc_llvm/ ${BUILD_DIR}/igc/src/llvm-project &&
|
||||
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/ ${BUILD_DIR}/igc/src/llvm-project/llvm/projects/opencl-clang &&
|
||||
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_translator/src/external_igc_spirv_translator/ ${BUILD_DIR}/igc/src/llvm-project/llvm/projects/llvm-spirv &&
|
||||
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_tools/src/external_igc_spirv_tools/ ${BUILD_DIR}/igc/src/SPIRV-Tools &&
|
||||
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_headers/src/external_igc_spirv_headers/ ${BUILD_DIR}/igc/src/SPIRV-Headers &&
|
||||
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_vcintrinsics/src/external_igc_vcintrinsics/ ${BUILD_DIR}/igc/src/vc-intrinsics
|
||||
PREFIX ${BUILD_DIR}/igc
|
||||
INSTALL_DIR ${LIBDIR}/igc
|
||||
INSTALL_COMMAND ${CMAKE_COMMAND} --install . --strip
|
||||
CMAKE_GENERATOR ${IGC_GENERATOR}
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
external_igc
|
||||
external_igc_vcintrinsics
|
||||
external_igc_llvm
|
||||
external_igc_opencl_clang
|
||||
external_igc_vcintrinsics
|
||||
external_igc_spirv_headers
|
||||
external_igc_spirv_tools
|
||||
external_igc_spirv_translator
|
||||
)
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
add_dependencies(
|
||||
external_igc
|
||||
external_flex
|
||||
)
|
||||
endif()
|
@@ -6,7 +6,6 @@ if(WIN32)
|
||||
-DBISON_EXECUTABLE=${LIBDIR}/flexbison/win_bison.exe
|
||||
-DM4_EXECUTABLE=${DOWNLOAD_DIR}/mingw/mingw64/msys/1.0/bin/m4.exe
|
||||
-DARM_ENABLED=Off
|
||||
-DPython3_FIND_REGISTRY=NEVER
|
||||
)
|
||||
elseif(APPLE)
|
||||
# Use bison and flex installed via Homebrew.
|
||||
@@ -28,7 +27,7 @@ elseif(UNIX)
|
||||
set(ISPC_EXTRA_ARGS_UNIX
|
||||
-DCMAKE_C_COMPILER=${LIBDIR}/llvm/bin/clang
|
||||
-DCMAKE_CXX_COMPILER=${LIBDIR}/llvm/bin/clang++
|
||||
-DARM_ENABLED=${BLENDER_PLATFORM_ARM}
|
||||
-DARM_ENABLED=Off
|
||||
-DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
|
||||
)
|
||||
endif()
|
||||
@@ -44,8 +43,6 @@ set(ISPC_EXTRA_ARGS
|
||||
-DISPC_INCLUDE_TESTS=Off
|
||||
-DCLANG_LIBRARY_DIR=${LIBDIR}/llvm/lib
|
||||
-DCLANG_INCLUDE_DIRS=${LIBDIR}/llvm/include
|
||||
-DPython3_ROOT_DIR=${LIBDIR}/python/
|
||||
-DPython3_EXECUTABLE=${PYTHON_BINARY}
|
||||
${ISPC_EXTRA_ARGS_WIN}
|
||||
${ISPC_EXTRA_ARGS_APPLE}
|
||||
${ISPC_EXTRA_ARGS_UNIX}
|
||||
@@ -64,7 +61,6 @@ ExternalProject_Add(external_ispc
|
||||
add_dependencies(
|
||||
external_ispc
|
||||
ll
|
||||
external_python
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
|
@@ -82,3 +82,4 @@ add_dependencies(
|
||||
ll
|
||||
external_python
|
||||
)
|
||||
|
||||
|
@@ -1,18 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# shorthand to only unpack a certain dependency
|
||||
macro(unpack_only name)
|
||||
string(TOUPPER ${name} UPPER_NAME)
|
||||
set(TARGET_FILE ${${UPPER_NAME}_FILE})
|
||||
set(TARGET_HASH_TYPE ${${UPPER_NAME}_HASH_TYPE})
|
||||
set(TARGET_HASH ${${UPPER_NAME}_HASH})
|
||||
ExternalProject_Add(external_${name}
|
||||
URL file://${PACKAGE_DIR}/${TARGET_FILE}
|
||||
URL_HASH ${TARGET_HASH_TYPE}=${TARGET_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/${name}
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
endmacro()
|
@@ -1,24 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(OCLOC_EXTRA_ARGS
|
||||
-DNEO_SKIP_UNIT_TESTS=1
|
||||
-DNEO_BUILD_WITH_OCL=0
|
||||
-DBUILD_WITH_L0=0
|
||||
-DIGC_DIR=${LIBDIR}/igc
|
||||
-DGMM_DIR=${LIBDIR}/gmmlib
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_ocloc
|
||||
URL file://${PACKAGE_DIR}/${OCLOC_FILE}
|
||||
URL_HASH ${OCLOC_HASH_TYPE}=${OCLOC_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/ocloc
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/ocloc ${DEFAULT_CMAKE_FLAGS} ${OCLOC_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/ocloc
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
external_ocloc
|
||||
external_igc
|
||||
external_gmmlib
|
||||
)
|
@@ -9,7 +9,6 @@ set(OIDN_EXTRA_ARGS
|
||||
-DOIDN_STATIC_RUNTIME=OFF
|
||||
-DISPC_EXECUTABLE=${LIBDIR}/ispc/bin/ispc
|
||||
-DOIDN_FILTER_RTLIGHTMAP=OFF
|
||||
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
@@ -39,7 +38,6 @@ add_dependencies(
|
||||
external_openimagedenoise
|
||||
external_tbb
|
||||
external_ispc
|
||||
external_python
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
|
@@ -38,7 +38,6 @@ message("BUILD_DIR = ${BUILD_DIR}")
|
||||
if(WIN32)
|
||||
set(PATCH_CMD ${DOWNLOAD_DIR}/mingw/mingw64/msys/1.0/bin/patch.exe)
|
||||
set(LIBEXT ".lib")
|
||||
set(SHAREDLIBEXT ".lib")
|
||||
set(LIBPREFIX "")
|
||||
|
||||
# For OIIO and OSL
|
||||
@@ -97,7 +96,6 @@ if(WIN32)
|
||||
else()
|
||||
set(PATCH_CMD patch)
|
||||
set(LIBEXT ".a")
|
||||
set(SHAREDLIBEXT ".so")
|
||||
set(LIBPREFIX "lib")
|
||||
|
||||
if(APPLE)
|
||||
|
@@ -147,7 +147,7 @@ set(OPENIMAGEIO_HASH de45fb38501c4581062b522b53b6141c)
|
||||
set(OPENIMAGEIO_HASH_TYPE MD5)
|
||||
set(OPENIMAGEIO_FILE OpenImageIO-${OPENIMAGEIO_VERSION}.tar.gz)
|
||||
|
||||
# 8.0.0 is currently oiio's preferred version although never versions may be available.
|
||||
# 8.0.0 is currently oiio's preferred vesion although never versions may be available.
|
||||
# the preferred version can be found in oiio's externalpackages.cmake
|
||||
set(FMT_VERSION 8.0.0)
|
||||
set(FMT_URI https://github.com/fmtlib/fmt/archive/refs/tags/${FMT_VERSION}.tar.gz)
|
||||
@@ -155,7 +155,7 @@ set(FMT_HASH 7bce0e9e022e586b178b150002e7c2339994e3c2bbe44027e9abb0d60f9cce83)
|
||||
set(FMT_HASH_TYPE SHA256)
|
||||
set(FMT_FILE fmt-${FMT_VERSION}.tar.gz)
|
||||
|
||||
# 0.6.2 is currently oiio's preferred version although never versions may be available.
|
||||
# 0.6.2 is currently oiio's preferred vesion although never versions may be available.
|
||||
# the preferred version can be found in oiio's externalpackages.cmake
|
||||
set(ROBINMAP_VERSION v0.6.2)
|
||||
set(ROBINMAP_URI https://github.com/Tessil/robin-map/archive/refs/tags/${ROBINMAP_VERSION}.tar.gz)
|
||||
@@ -410,9 +410,9 @@ set(SQLITE_HASH fb558c49ee21a837713c4f1e7e413309aabdd9c7)
|
||||
set(SQLITE_HASH_TYPE SHA1)
|
||||
set(SQLITE_FILE sqlite-src-3240000.zip)
|
||||
|
||||
set(EMBREE_VERSION 3.13.4)
|
||||
set(EMBREE_VERSION 3.13.3)
|
||||
set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
|
||||
set(EMBREE_HASH 52d0be294d6c88ba7a6c9e046796e7be)
|
||||
set(EMBREE_HASH f62766ba54e48a2f327c3a22596e7133)
|
||||
set(EMBREE_HASH_TYPE MD5)
|
||||
set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)
|
||||
|
||||
@@ -502,134 +502,3 @@ set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${
|
||||
set(LEVEL_ZERO_HASH c39bb05a8e5898aa6c444e1704105b93d3f1888b9c333f8e7e73825ffbfb2617)
|
||||
set(LEVEL_ZERO_HASH_TYPE SHA256)
|
||||
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
|
||||
set(DPCPP_VERSION 20220620)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH a5f41abd5229d28afa92cbd8a5d8d786ee698bf239f722929fd686276bad692c)
|
||||
set(DPCPP_HASH_TYPE SHA256)
|
||||
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
||||
|
||||
########################
|
||||
### DPCPP DEPS BEGIN ###
|
||||
########################
|
||||
# The following deps are build time requirements for dpcpp, when possible
|
||||
# the source in the dpcpp source tree for the version chosen is documented
|
||||
# by each dep, these will only have to be downloaded and unpacked, dpcpp
|
||||
# will take care of building them, unpack is being done in dpcpp_deps.cmake
|
||||
|
||||
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
|
||||
set(VCINTRINSICS_VERSION 984bb27baacce6ee5c716c2e64845f2a1928025b)
|
||||
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(VCINTRINSICS_HASH abea415a15a0dd11fdc94dee8fb462910f2548311b787e02f42509789e1b0d7b)
|
||||
set(VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
# Source opencl/CMakeLists.txt
|
||||
set(OPENCLHEADERS_VERSION dcd5bede6859d26833cd85f0d6bbcee7382dc9b3)
|
||||
set(OPENCLHEADERS_URI https://github.com/KhronosGroup/OpenCL-Headers/archive/${OPENCLHEADERS_VERSION}.tar.gz)
|
||||
set(OPENCLHEADERS_HASH ca8090359654e94f2c41e946b7e9d826253d795ae809ce7c83a7d3c859624693)
|
||||
set(OPENCLHEADERS_HASH_TYPE SHA256)
|
||||
set(OPENCLHEADERS_FILE opencl_headers-${OPENCLHEADERS_VERSION}.tar.gz)
|
||||
|
||||
# Source opencl/CMakeLists.txt
|
||||
set(ICDLOADER_VERSION aec3952654832211636fc4af613710f80e203b0a)
|
||||
set(ICDLOADER_URI https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/${ICDLOADER_VERSION}.tar.gz)
|
||||
set(ICDLOADER_HASH e1880551d67bd8dc31d13de63b94bbfd6b1f315b6145dad1ffcd159b89bda93c)
|
||||
set(ICDLOADER_HASH_TYPE SHA256)
|
||||
set(ICDLOADER_FILE icdloader-${ICDLOADER_VERSION}.tar.gz)
|
||||
|
||||
# Source sycl/cmake/modules/AddBoostMp11Headers.cmake
|
||||
# Using external MP11 here, getting AddBoostMp11Headers.cmake to recognize
|
||||
# our copy in boost directly was more trouble than it was worth.
|
||||
set(MP11_VERSION 7bc4e1ae9b36ec8ee635c3629b59ec525bbe82b9)
|
||||
set(MP11_URI https://github.com/boostorg/mp11/archive/${MP11_VERSION}.tar.gz)
|
||||
set(MP11_HASH 071ee2bd3952ec89882edb3af25dd1816f6b61723f66e42eea32f4d02ceef426)
|
||||
set(MP11_HASH_TYPE SHA256)
|
||||
set(MP11_FILE mp11-${MP11_VERSION}.tar.gz)
|
||||
|
||||
# Source llvm-spirv/CMakeLists.txt (repo)
|
||||
# Source llvm-spirv/spirv-headers-tag.conf (hash)
|
||||
set(SPIRV_HEADERS_VERSION 36c0c1596225e728bd49abb7ef56a3953e7ed468)
|
||||
set(SPIRV_HEADERS_URI https://github.com/KhronosGroup/SPIRV-Headers/archive/${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
set(SPIRV_HEADERS_HASH 7a5c89633f8740456fe8adee052033e134476d267411d1336c0cb1e587a9229a)
|
||||
set(SPIRV_HEADERS_HASH_TYPE SHA256)
|
||||
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
|
||||
######################
|
||||
### DPCPP DEPS END ###
|
||||
######################
|
||||
|
||||
##########################################
|
||||
### Intel Graphics Compiler DEPS BEGIN ###
|
||||
##########################################
|
||||
# The following deps are build time requirements for the intel graphics
|
||||
# compiler, the versions used are taken from the following location
|
||||
# https://github.com/intel/intel-graphics-compiler/releases
|
||||
|
||||
set(IGC_VERSION 1.0.11222)
|
||||
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
|
||||
set(IGC_HASH d92f0608dcbb52690855685f9447282e5c09c0ba98ae35fabf114fcf8b1e9fcf)
|
||||
set(IGC_HASH_TYPE SHA256)
|
||||
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_LLVM_VERSION llvmorg-11.1.0)
|
||||
set(IGC_LLVM_URI https://github.com/llvm/llvm-project/archive/refs/tags/${IGC_LLVM_VERSION}.tar.gz)
|
||||
set(IGC_LLVM_HASH 53a0719f3f4b0388013cfffd7b10c7d5682eece1929a9553c722348d1f866e79)
|
||||
set(IGC_LLVM_HASH_TYPE SHA256)
|
||||
set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
|
||||
|
||||
# WARNING WARNING WARNING
|
||||
#
|
||||
# IGC_OPENCL_CLANG contains patches for some of its dependencies.
|
||||
#
|
||||
# Whenever IGC_OPENCL_CLANG_VERSION changes, one *MUST* inspect
|
||||
# IGC_OPENCL_CLANG's patches folder and update igc.cmake to account for
|
||||
# any added or removed patches.
|
||||
#
|
||||
# WARNING WARNING WARNING
|
||||
|
||||
set(IGC_OPENCL_CLANG_VERSION bbdd1587f577397a105c900be114b56755d1f7dc)
|
||||
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||
set(IGC_OPENCL_CLANG_HASH d08315f1b0d8a6fef33de2b3e6aa7356534c324910634962c72523d970773efc)
|
||||
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
|
||||
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_VCINTRINSICS_VERSION v0.4.0)
|
||||
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(IGC_VCINTRINSICS_HASH c8b92682ad5031cf9d5b82a40e7d5c0e763cd9278660adbcaa69aab988e4b589)
|
||||
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_SPIRV_HEADERS_VERSION sdk-1.3.204.1)
|
||||
set(IGC_SPIRV_HEADERS_URI https://github.com/KhronosGroup/SPIRV-Headers/archive/refs/tags/${IGC_SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
set(IGC_SPIRV_HEADERS_HASH 262864053968c217d45b24b89044a7736a32361894743dd6cfe788df258c746c)
|
||||
set(IGC_SPIRV_HEADERS_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_HEADERS_FILE SPIR-V-Headers-${IGC_SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_SPIRV_TOOLS_VERSION sdk-1.3.204.1)
|
||||
set(IGC_SPIRV_TOOLS_URI https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
|
||||
set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc103b087335)
|
||||
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_SPIRV_TRANSLATOR_VERSION 99420daab98998a7e36858befac9c5ed109d4920)
|
||||
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH 77dfb4ddb6bfb993535562c02ddea23f0a0d1c5a0258c1afe7e27c894ff783a8)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||
|
||||
########################################
|
||||
### Intel Graphics Compiler DEPS END ###
|
||||
########################################
|
||||
|
||||
set(GMMLIB_VERSION intel-gmmlib-22.1.2)
|
||||
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
|
||||
set(GMMLIB_HASH 3b9a6d5e7e3f5748b3d0a2fb0e980ae943907fece0980bd9c0508e71c838e334)
|
||||
set(GMMLIB_HASH_TYPE SHA256)
|
||||
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
||||
|
||||
set(OCLOC_VERSION 22.20.23198)
|
||||
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
||||
set(OCLOC_HASH ab22b8bf2560a57fdd3def0e35a62ca75991406f959c0263abb00cd6cd9ae998)
|
||||
set(OCLOC_HASH_TYPE SHA256)
|
||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||
|
@@ -465,7 +465,7 @@ TBB_VERSION="2020"
|
||||
TBB_VERSION_SHORT="2020"
|
||||
TBB_VERSION_UPDATE="_U3" # Used for source packages...
|
||||
TBB_VERSION_MIN="2018"
|
||||
TBB_VERSION_MEX="2021" # 2021 introduces 'oneTBB', which has lots of compatibility breakage with previous versions
|
||||
TBB_VERSION_MEX="2022"
|
||||
TBB_FORCE_BUILD=false
|
||||
TBB_FORCE_REBUILD=false
|
||||
TBB_SKIP=false
|
||||
@@ -567,7 +567,7 @@ OPENCOLLADA_FORCE_BUILD=false
|
||||
OPENCOLLADA_FORCE_REBUILD=false
|
||||
OPENCOLLADA_SKIP=false
|
||||
|
||||
EMBREE_VERSION="3.13.4"
|
||||
EMBREE_VERSION="3.13.3"
|
||||
EMBREE_VERSION_SHORT="3.13"
|
||||
EMBREE_VERSION_MIN="3.13"
|
||||
EMBREE_VERSION_MEX="4.0"
|
||||
@@ -635,6 +635,9 @@ MP3LAME_DEV=""
|
||||
OPENJPEG_USE=false
|
||||
OPENJPEG_DEV=""
|
||||
|
||||
# Whether to use system GLEW or not (OpenSubDiv needs recent glew to work).
|
||||
NO_SYSTEM_GLEW=false
|
||||
|
||||
# Switch to english language, else some things (like check_package_DEB()) won't work!
|
||||
LANG_BACK=$LANG
|
||||
LANG=""
|
||||
@@ -1190,7 +1193,7 @@ Those libraries should be available as packages in all recent distributions (opt
|
||||
* libx11, libxcursor, libxi, libxrandr, libxinerama (and other libx... as needed).
|
||||
* libwayland-client0, libwayland-cursor0, libwayland-egl1, libxkbcommon0, libdbus-1-3, libegl1 (Wayland)
|
||||
* libsqlite3, libzstd, libbz2, libssl, libfftw3, libxml2, libtinyxml, yasm, libyaml-cpp, flex.
|
||||
* libsdl2, libglew, libpugixml, libpotrace, [libgmp], fontconfig, [libharu/libhpdf].\""
|
||||
* libsdl2, libglew, libpugixml, libpotrace, [libgmp], [libglewmx], fontconfig, [libharu/libhpdf].\""
|
||||
|
||||
DEPS_SPECIFIC_INFO="\"BUILDABLE DEPENDENCIES:
|
||||
|
||||
@@ -1684,7 +1687,7 @@ compile_TBB() {
|
||||
fi
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled result!
|
||||
tbb_magic=1
|
||||
tbb_magic=0
|
||||
_init_tbb
|
||||
|
||||
# Force having own builds for the dependencies.
|
||||
@@ -2693,13 +2696,14 @@ compile_OSD() {
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake_d="-D CMAKE_BUILD_TYPE=Release"
|
||||
if [ -d $INST/tbb ]; then
|
||||
cmake_d="$cmake_d -D TBB_LOCATION=$INST/tbb"
|
||||
cmake_d="$cmake_d $cmake_d -D TBB_LOCATION=$INST/tbb"
|
||||
fi
|
||||
cmake_d="-D CMAKE_BUILD_TYPE=Release"
|
||||
cmake_d="$cmake_d -D CMAKE_INSTALL_PREFIX=$_inst"
|
||||
# ptex is only needed when nicholas bishop is ready
|
||||
cmake_d="$cmake_d -D NO_PTEX=1"
|
||||
cmake_d="$cmake_d -D NO_CLEW=1 -D NO_CUDA=1 -D NO_OPENCL=1 -D NO_GLEW=1"
|
||||
cmake_d="$cmake_d -D NO_CLEW=1 -D NO_CUDA=1 -D NO_OPENCL=1"
|
||||
# maya plugin, docs, tutorials, regression tests and examples are not needed
|
||||
cmake_d="$cmake_d -D NO_MAYA=1 -D NO_DOC=1 -D NO_TUTORIALS=1 -D NO_REGRESSION=1 -DNO_EXAMPLES=1"
|
||||
|
||||
@@ -3322,7 +3326,7 @@ compile_Embree() {
|
||||
fi
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled results!
|
||||
embree_magic=11
|
||||
embree_magic=10
|
||||
_init_embree
|
||||
|
||||
# Force having own builds for the dependencies.
|
||||
@@ -3382,7 +3386,7 @@ compile_Embree() {
|
||||
|
||||
cmake_d="$cmake_d -D EMBREE_TASKING_SYSTEM=TBB"
|
||||
if [ -d $INST/tbb ]; then
|
||||
cmake_d="$cmake_d -D EMBREE_TBB_ROOT=$INST/tbb"
|
||||
make_d="$make_d EMBREE_TBB_ROOT=$INST/tbb"
|
||||
fi
|
||||
|
||||
cmake $cmake_d ../
|
||||
@@ -3521,7 +3525,7 @@ compile_OIDN() {
|
||||
install_ISPC
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled results!
|
||||
oidn_magic=10
|
||||
oidn_magic=9
|
||||
_init_oidn
|
||||
|
||||
# Force having own builds for the dependencies.
|
||||
@@ -3577,7 +3581,7 @@ compile_OIDN() {
|
||||
cmake_d="$cmake_d -D ISPC_DIR_HINT=$_ispc_path_bin"
|
||||
|
||||
if [ -d $INST/tbb ]; then
|
||||
cmake_d="$cmake_d -D TBB_ROOT=$INST/tbb"
|
||||
make_d="$make_d TBB_ROOT=$INST/tbb"
|
||||
fi
|
||||
|
||||
cmake $cmake_d ../
|
||||
@@ -4058,6 +4062,7 @@ install_DEB() {
|
||||
libopenal-dev libglew-dev yasm \
|
||||
libsdl2-dev libfftw3-dev patch bzip2 libxml2-dev libtinyxml-dev libjemalloc-dev \
|
||||
libgmp-dev libpugixml-dev libpotrace-dev libhpdf-dev libzstd-dev libpystring-dev"
|
||||
# libglewmx-dev (broken in deb testing currently...)
|
||||
|
||||
VORBIS_USE=true
|
||||
OGG_USE=true
|
||||
@@ -4166,7 +4171,7 @@ install_DEB() {
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check cmake version and disable features for older distros.
|
||||
# Check cmake/glew versions and disable features for older distros.
|
||||
# This is so Blender can at least compile.
|
||||
PRINT ""
|
||||
_cmake=`get_package_version_DEB cmake`
|
||||
@@ -4183,6 +4188,28 @@ install_DEB() {
|
||||
fi
|
||||
fi
|
||||
|
||||
PRINT ""
|
||||
_glew=`get_package_version_DEB libglew-dev`
|
||||
if [ -z $_glew ]; then
|
||||
# Stupid virtual package in Ubuntu 12.04 doesn't show version number...
|
||||
_glew=`apt-cache showpkg libglew-dev|tail -n1|awk '{print $2}'|sed 's/-.*//'`
|
||||
fi
|
||||
version_ge $_glew "1.9.0"
|
||||
if [ $? -eq 1 ]; then
|
||||
version_ge $_glew "1.7.0"
|
||||
if [ $? -eq 1 ]; then
|
||||
WARNING "OpenSubdiv disabled because GLEW-$_glew is not enough"
|
||||
WARNING "Blender will not use system GLEW library"
|
||||
OSD_SKIP=true
|
||||
NO_SYSTEM_GLEW=true
|
||||
else
|
||||
WARNING "OpenSubdiv will compile with GLEW-$_glew but with limited capability"
|
||||
WARNING "Blender will not use system GLEW library"
|
||||
NO_SYSTEM_GLEW=true
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
PRINT ""
|
||||
_do_compile_python=false
|
||||
if [ "$PYTHON_SKIP" = true ]; then
|
||||
@@ -6263,6 +6290,12 @@ print_info() {
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$NO_SYSTEM_GLEW" = true ]; then
|
||||
_1="-D WITH_SYSTEM_GLEW=OFF"
|
||||
PRINT " $_1"
|
||||
_buildargs="$_buildargs $_1"
|
||||
fi
|
||||
|
||||
if [ "$FFMPEG_SKIP" = false ]; then
|
||||
_1="-D WITH_CODEC_FFMPEG=ON"
|
||||
PRINT " $_1"
|
||||
|
@@ -1,54 +0,0 @@
|
||||
diff -Naur external_dpcpp.orig/sycl/source/CMakeLists.txt external_dpcpp/sycl/source/CMakeLists.txt
|
||||
--- external_dpcpp.orig/sycl/source/CMakeLists.txt 2022-05-20 04:19:45.067771362 +0000
|
||||
+++ external_dpcpp/sycl/source/CMakeLists.txt 2022-05-20 04:21:49.708025048 +0000
|
||||
@@ -66,10 +66,10 @@
|
||||
target_compile_options(${LIB_OBJ_NAME} PUBLIC
|
||||
-fvisibility=hidden -fvisibility-inlines-hidden)
|
||||
set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/ld-version-script.txt")
|
||||
- set(abi_linker_script "${CMAKE_CURRENT_SOURCE_DIR}/abi_replacements_linux.txt")
|
||||
- target_link_libraries(
|
||||
- ${LIB_NAME} PRIVATE "-Wl,${abi_linker_script}")
|
||||
- set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${abi_linker_script})
|
||||
+# set(abi_linker_script "${CMAKE_CURRENT_SOURCE_DIR}/abi_replacements_linux.txt")
|
||||
+# target_link_libraries(
|
||||
+# ${LIB_NAME} PRIVATE "-Wl,${abi_linker_script}")
|
||||
+# set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${abi_linker_script})
|
||||
target_link_libraries(
|
||||
${LIB_NAME} PRIVATE "-Wl,--version-script=${linker_script}")
|
||||
set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${linker_script})
|
||||
diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt
|
||||
--- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt 2022-04-29 13:47:11 -0600
|
||||
+++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt 2022-05-21 15:25:06 -0600
|
||||
@@ -11,6 +11,11 @@
|
||||
)
|
||||
endif()
|
||||
|
||||
+# Blender code below is determined to use FetchContent_Declare
|
||||
+# temporarily allow it (but feed it our downloaded tarball
|
||||
+# in the OpenCL_HEADERS variable
|
||||
+set(FETCHCONTENT_FULLY_DISCONNECTED OFF)
|
||||
+
|
||||
# Repo URLs
|
||||
|
||||
set(OCL_HEADERS_REPO
|
||||
@@ -77,5 +82,6 @@
|
||||
|
||||
FetchContent_MakeAvailable(ocl-icd)
|
||||
add_library(OpenCL-ICD ALIAS OpenCL)
|
||||
+set(FETCHCONTENT_FULLY_DISCONNECTED ON)
|
||||
|
||||
add_subdirectory(opencl-aot)
|
||||
diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake
|
||||
--- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-02-08 09:17:24 -0700
|
||||
+++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-05-24 11:35:51 -0600
|
||||
@@ -36,7 +36,9 @@
|
||||
add_custom_target(libsycldevice-obj)
|
||||
add_custom_target(libsycldevice-spv)
|
||||
|
||||
-add_custom_target(libsycldevice DEPENDS
|
||||
+# Blender: add ALL here otherwise this target will not build
|
||||
+# and cause an error due to missing files during the install phase.
|
||||
+add_custom_target(libsycldevice ALL DEPENDS
|
||||
libsycldevice-obj
|
||||
libsycldevice-spv)
|
||||
|
@@ -1,37 +1,30 @@
|
||||
diff -Naur org/kernels/rtcore_config.h.in embree-3.13.4/kernels/rtcore_config.h.in
|
||||
--- org/kernels/rtcore_config.h.in 2022-06-14 22:13:52 -0600
|
||||
+++ embree-3.13.4/kernels/rtcore_config.h.in 2022-06-24 15:20:12 -0600
|
||||
@@ -14,6 +14,7 @@
|
||||
#cmakedefine01 EMBREE_MIN_WIDTH
|
||||
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
|
||||
|
||||
+#cmakedefine EMBREE_STATIC_LIB
|
||||
#cmakedefine EMBREE_API_NAMESPACE
|
||||
|
||||
#if defined(EMBREE_API_NAMESPACE)
|
||||
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
|
||||
index 7c2f43d..106b1d5 100644
|
||||
--- a/kernels/CMakeLists.txt
|
||||
+++ b/kernels/CMakeLists.txt
|
||||
@@ -201,6 +201,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
|
||||
#message("AVX2: ${EMBREE_LIBRARY_FILES_AVX2}")
|
||||
#message("AVX512: ${EMBREE_LIBRARY_FILES_AVX512}")
|
||||
diff -Naur orig/common/sys/platform.h external_embree/common/sys/platform.h
|
||||
--- orig/common/sys/platform.h 2020-05-13 23:08:53 -0600
|
||||
+++ external_embree/common/sys/platform.h 2020-06-13 17:40:26 -0600
|
||||
@@ -84,8 +84,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
+# Bundle Neon2x into the main static library.
|
||||
+IF(EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB)
|
||||
+ LIST(APPEND EMBREE_LIBRARY_FILES ${EMBREE_LIBRARY_FILES_AVX2})
|
||||
+ LIST(REMOVE_DUPLICATES EMBREE_LIBRARY_FILES)
|
||||
+ENDIF()
|
||||
+
|
||||
# replaces all .cpp files with a dummy file that includes that .cpp file
|
||||
# this is to work around an ICC name mangling issue related to lambda functions under windows
|
||||
MACRO (CreateISADummyFiles list isa)
|
||||
@@ -277,7 +283,7 @@ IF (EMBREE_ISA_AVX AND EMBREE_LIBRARY_FILES_AVX)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
-IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2)
|
||||
+IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2 AND NOT (EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB))
|
||||
DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2})
|
||||
ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2})
|
||||
TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking)
|
||||
#ifdef __WIN32__
|
||||
-#define dll_export __declspec(dllexport)
|
||||
-#define dll_import __declspec(dllimport)
|
||||
+#define dll_export
|
||||
+#define dll_import
|
||||
#else
|
||||
#define dll_export __attribute__ ((visibility ("default")))
|
||||
#define dll_import
|
||||
diff --git orig/common/tasking/CMakeLists.txt external_embree/common/tasking/CMakeLists.txt
|
||||
--- orig/common/tasking/CMakeLists.txt
|
||||
+++ external_embree/common/tasking/CMakeLists.txt
|
||||
@@ -27,7 +27,11 @@
|
||||
else()
|
||||
# If not found try getting older TBB via module (FindTBB.cmake)
|
||||
unset(TBB_DIR CACHE)
|
||||
- find_package(TBB 4.1 REQUIRED tbb)
|
||||
+ if (TBB_STATIC_LIB)
|
||||
+ find_package(TBB 4.1 REQUIRED tbb_static)
|
||||
+ else()
|
||||
+ find_package(TBB 4.1 REQUIRED tbb)
|
||||
+ endif()
|
||||
if (TBB_FOUND)
|
||||
TARGET_LINK_LIBRARIES(tasking PUBLIC TBB)
|
||||
TARGET_INCLUDE_DIRECTORIES(tasking PUBLIC "${TBB_INCLUDE_DIRS}")
|
||||
|
@@ -1,15 +0,0 @@
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index c6f12d644..3c977a4e3 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -25,8 +25,10 @@
|
||||
# autoconf requirements and initialization
|
||||
|
||||
AC_INIT([the fast lexical analyser generator],[2.6.4],[flex-help@lists.sourceforge.net],[flex])
|
||||
+AC_PREREQ([2.60])
|
||||
AC_CONFIG_SRCDIR([src/scan.l])
|
||||
AC_CONFIG_AUX_DIR([build-aux])
|
||||
+AC_USE_SYSTEM_EXTENSIONS
|
||||
LT_INIT
|
||||
AM_INIT_AUTOMAKE([1.15 -Wno-portability foreign std-options dist-lzip parallel-tests subdir-objects])
|
||||
AC_CONFIG_HEADER([src/config.h])
|
@@ -1,44 +0,0 @@
|
||||
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
|
||||
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
|
||||
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
|
||||
@@ -126,22 +126,24 @@
|
||||
)
|
||||
endif()
|
||||
|
||||
-
|
||||
- set(SPIRV_BASE_REVISION llvm_release_110)
|
||||
- set(TARGET_BRANCH "ocl-open-110")
|
||||
- get_filename_component(LLVM_MONOREPO_DIR ${LLVM_SOURCE_DIR} DIRECTORY)
|
||||
- set(LLVM_PATCHES_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm
|
||||
- ${CMAKE_CURRENT_SOURCE_DIR}/patches/clang)
|
||||
- apply_patches(${LLVM_MONOREPO_DIR}
|
||||
- "${LLVM_PATCHES_DIRS}"
|
||||
- ${LLVM_BASE_REVISION}
|
||||
- ${TARGET_BRANCH}
|
||||
- ret)
|
||||
- apply_patches(${SPIRV_SOURCE_DIR}
|
||||
- ${CMAKE_CURRENT_SOURCE_DIR}/patches/spirv
|
||||
- ${SPIRV_BASE_REVISION}
|
||||
- ${TARGET_BRANCH}
|
||||
- ret)
|
||||
+ #
|
||||
+ # Blender: Why apply these manually in igc.cmake
|
||||
+ #
|
||||
+ #set(SPIRV_BASE_REVISION llvm_release_110)
|
||||
+ #set(TARGET_BRANCH "ocl-open-110")
|
||||
+ #get_filename_component(LLVM_MONOREPO_DIR ${LLVM_SOURCE_DIR} DIRECTORY)
|
||||
+ #set(LLVM_PATCHES_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm
|
||||
+ # ${CMAKE_CURRENT_SOURCE_DIR}/patches/clang)
|
||||
+ #apply_patches(${LLVM_MONOREPO_DIR}
|
||||
+ # "${LLVM_PATCHES_DIRS}"
|
||||
+ # ${LLVM_BASE_REVISION}
|
||||
+ # ${TARGET_BRANCH}
|
||||
+ # ret)
|
||||
+ #apply_patches(${SPIRV_SOURCE_DIR}
|
||||
+ # ${CMAKE_CURRENT_SOURCE_DIR}/patches/spirv
|
||||
+ # ${SPIRV_BASE_REVISION}
|
||||
+ # ${TARGET_BRANCH}
|
||||
+ # ret)
|
||||
endif(NOT USE_PREBUILT_LLVM)
|
||||
|
||||
#
|
@@ -1,56 +0,0 @@
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# Copyright 2021-2022 Intel Corporation
|
||||
|
||||
# - Find Level Zero library
|
||||
# Find Level Zero headers and libraries needed by oneAPI implementation
|
||||
# This module defines
|
||||
# LEVEL_ZERO_LIBRARY, libraries to link against in order to use L0.
|
||||
# LEVEL_ZERO_INCLUDE_DIR, directories where L0 headers can be found.
|
||||
# LEVEL_ZERO_ROOT_DIR, The base directory to search for L0 files.
|
||||
# This can also be an environment variable.
|
||||
# LEVEL_ZERO_FOUND, If false, then don't try to use L0.
|
||||
|
||||
IF(NOT LEVEL_ZERO_ROOT_DIR AND NOT $ENV{LEVEL_ZERO_ROOT_DIR} STREQUAL "")
|
||||
SET(LEVEL_ZERO_ROOT_DIR $ENV{LEVEL_ZERO_ROOT_DIR})
|
||||
ENDIF()
|
||||
|
||||
SET(_level_zero_search_dirs
|
||||
${LEVEL_ZERO_ROOT_DIR}
|
||||
/usr/lib
|
||||
/usr/local/lib
|
||||
)
|
||||
|
||||
FIND_LIBRARY(_LEVEL_ZERO_LIBRARY
|
||||
NAMES
|
||||
ze_loader
|
||||
HINTS
|
||||
${_level_zero_search_dirs}
|
||||
PATH_SUFFIXES
|
||||
lib64 lib
|
||||
)
|
||||
|
||||
FIND_PATH(_LEVEL_ZERO_INCLUDE_DIR
|
||||
NAMES
|
||||
level_zero/ze_api.h
|
||||
HINTS
|
||||
${_level_zero_search_dirs}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LevelZero DEFAULT_MSG _LEVEL_ZERO_LIBRARY _LEVEL_ZERO_INCLUDE_DIR)
|
||||
|
||||
IF(LevelZero_FOUND)
|
||||
SET(LEVEL_ZERO_LIBRARY ${_LEVEL_ZERO_LIBRARY})
|
||||
SET(LEVEL_ZERO_INCLUDE_DIR ${_LEVEL_ZERO_INCLUDE_DIR} ${_LEVEL_ZERO_INCLUDE_PARENT_DIR})
|
||||
SET(LEVEL_ZERO_FOUND TRUE)
|
||||
ELSE()
|
||||
SET(LEVEL_ZERO_FOUND FALSE)
|
||||
ENDIF()
|
||||
|
||||
MARK_AS_ADVANCED(
|
||||
LEVEL_ZERO_LIBRARY
|
||||
LEVEL_ZERO_INCLUDE_DIR
|
||||
)
|
@@ -1,88 +0,0 @@
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# Copyright 2021-2022 Intel Corporation
|
||||
|
||||
# - Find SYCL library
|
||||
# Find the native SYCL header and libraries needed by oneAPI implementation
|
||||
# This module defines
|
||||
# SYCL_COMPILER, compiler which will be used for compilation of SYCL code
|
||||
# SYCL_LIBRARY, libraries to link against in order to use SYCL.
|
||||
# SYCL_INCLUDE_DIR, directories where SYCL headers can be found
|
||||
# SYCL_ROOT_DIR, The base directory to search for SYCL files.
|
||||
# This can also be an environment variable.
|
||||
# SYCL_FOUND, If false, then don't try to use SYCL.
|
||||
|
||||
IF(NOT SYCL_ROOT_DIR AND NOT $ENV{SYCL_ROOT_DIR} STREQUAL "")
|
||||
SET(SYCL_ROOT_DIR $ENV{SYCL_ROOT_DIR})
|
||||
ENDIF()
|
||||
|
||||
SET(_sycl_search_dirs
|
||||
${SYCL_ROOT_DIR}
|
||||
/usr/lib
|
||||
/usr/local/lib
|
||||
/opt/intel/oneapi/compiler/latest/linux/
|
||||
C:/Program\ Files\ \(x86\)/Intel/oneAPI/compiler/latest/windows
|
||||
)
|
||||
|
||||
# Find DPC++ compiler.
|
||||
# Since the compiler name is possibly conflicting with the system-wide
|
||||
# CLang start with looking for either dpcpp or clang binary in the given
|
||||
# list of search paths only. If that fails, try to look for a system-wide
|
||||
# dpcpp binary.
|
||||
FIND_PROGRAM(SYCL_COMPILER
|
||||
NAMES
|
||||
dpcpp
|
||||
clang++
|
||||
HINTS
|
||||
${_sycl_search_dirs}
|
||||
PATH_SUFFIXES
|
||||
bin
|
||||
NO_CMAKE_FIND_ROOT_PATH
|
||||
NAMES_PER_DIR
|
||||
)
|
||||
|
||||
# NOTE: No clang++ here so that we do not pick up a system-wide CLang
|
||||
# compiler.
|
||||
if(NOT SYCL_COMPILER)
|
||||
FIND_PROGRAM(SYCL_COMPILER
|
||||
NAMES
|
||||
dpcpp
|
||||
HINTS
|
||||
${_sycl_search_dirs}
|
||||
PATH_SUFFIXES
|
||||
bin
|
||||
)
|
||||
endif()
|
||||
|
||||
FIND_LIBRARY(SYCL_LIBRARY
|
||||
NAMES
|
||||
sycl
|
||||
HINTS
|
||||
${_sycl_search_dirs}
|
||||
PATH_SUFFIXES
|
||||
lib64 lib
|
||||
)
|
||||
|
||||
FIND_PATH(SYCL_INCLUDE_DIR
|
||||
NAMES
|
||||
CL/sycl.hpp
|
||||
HINTS
|
||||
${_sycl_search_dirs}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
include/sycl
|
||||
)
|
||||
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL DEFAULT_MSG SYCL_LIBRARY SYCL_INCLUDE_DIR)
|
||||
|
||||
IF(SYCL_FOUND)
|
||||
get_filename_component(_SYCL_INCLUDE_PARENT_DIR ${SYCL_INCLUDE_DIR} DIRECTORY)
|
||||
SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${_SYCL_INCLUDE_PARENT_DIR})
|
||||
ELSE()
|
||||
SET(SYCL_SYCL_FOUND FALSE)
|
||||
ENDIF()
|
||||
|
||||
MARK_AS_ADVANCED(
|
||||
_SYCL_INCLUDE_PARENT_DIR
|
||||
)
|
@@ -70,7 +70,7 @@ if(NOT WIN32)
|
||||
set(WITH_JACK ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
if(WIN32)
|
||||
set(WITH_WASAPI ON CACHE BOOL "" FORCE)
|
||||
set(WITH_WASAPI ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
if(UNIX AND NOT APPLE)
|
||||
set(WITH_DOC_MANPAGE ON CACHE BOOL "" FORCE)
|
||||
@@ -78,11 +78,6 @@ if(UNIX AND NOT APPLE)
|
||||
set(WITH_PULSEAUDIO ON CACHE BOOL "" FORCE)
|
||||
set(WITH_X11_XINPUT ON CACHE BOOL "" FORCE)
|
||||
set(WITH_X11_XF86VMODE ON CACHE BOOL "" FORCE)
|
||||
|
||||
# Disable oneAPI on Linux for the time being.
|
||||
# The AoT compilation takes too long to be used officially in the buildbot CI/CD and the JIT
|
||||
# compilation has ABI compatibility issues when running builds made on centOS on Ubuntu.
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI OFF CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
if(NOT APPLE)
|
||||
set(WITH_XR_OPENXR ON CACHE BOOL "" FORCE)
|
||||
@@ -91,8 +86,4 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||
|
||||
# Disable AoT kernels compilations until buildbot can deliver them in a reasonabel time.
|
||||
set(WITH_CYCLES_ONEAPI_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@@ -38,15 +38,9 @@ if(EXISTS ${LIBDIR})
|
||||
message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
|
||||
|
||||
file(GLOB LIB_SUBDIRS ${LIBDIR}/*)
|
||||
|
||||
# Ignore Mesa software OpenGL libraries, they are not intended to be
|
||||
# linked against but to optionally override at runtime.
|
||||
list(REMOVE_ITEM LIB_SUBDIRS ${LIBDIR}/mesa)
|
||||
|
||||
# Ignore DPC++ as it contains its own copy of LLVM/CLang which we do
|
||||
# not need to be ever discovered for the Blender linking.
|
||||
list(REMOVE_ITEM LIB_SUBDIRS ${LIBDIR}/dpcpp)
|
||||
|
||||
# NOTE: Make sure "proper" compiled zlib comes first before the one
|
||||
# which is a part of OpenCollada. They have different ABI, and we
|
||||
# do need to use the official one.
|
||||
@@ -277,18 +271,6 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
set(CYCLES_LEVEL_ZERO ${LIBDIR}/level-zero CACHE PATH "Path to Level Zero installation")
|
||||
if(EXISTS ${CYCLES_LEVEL_ZERO} AND NOT LEVEL_ZERO_ROOT_DIR)
|
||||
set(LEVEL_ZERO_ROOT_DIR ${CYCLES_LEVEL_ZERO})
|
||||
endif()
|
||||
|
||||
set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to DPC++ and SYCL installation")
|
||||
if(EXISTS ${CYCLES_SYCL} AND NOT SYCL_ROOT_DIR)
|
||||
set(SYCL_ROOT_DIR ${CYCLES_SYCL})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_OPENVDB)
|
||||
find_package_wrapper(OpenVDB)
|
||||
find_package_wrapper(Blosc)
|
||||
@@ -631,42 +613,17 @@ if(WITH_GHOST_WAYLAND)
|
||||
pkg_check_modules(wayland-scanner REQUIRED wayland-scanner)
|
||||
pkg_check_modules(xkbcommon REQUIRED xkbcommon)
|
||||
pkg_check_modules(wayland-cursor REQUIRED wayland-cursor)
|
||||
pkg_check_modules(dbus REQUIRED dbus-1)
|
||||
|
||||
if(WITH_GHOST_WAYLAND_DBUS)
|
||||
pkg_check_modules(dbus REQUIRED dbus-1)
|
||||
endif()
|
||||
|
||||
if(WITH_GHOST_WAYLAND_LIBDECOR)
|
||||
pkg_check_modules(libdecor REQUIRED libdecor-0>=0.1)
|
||||
endif()
|
||||
set(WITH_GL_EGL ON)
|
||||
|
||||
list(APPEND PLATFORM_LINKLIBS
|
||||
${wayland-client_LINK_LIBRARIES}
|
||||
${wayland-egl_LINK_LIBRARIES}
|
||||
${xkbcommon_LINK_LIBRARIES}
|
||||
${wayland-cursor_LINK_LIBRARIES}
|
||||
${dbus_LINK_LIBRARIES}
|
||||
)
|
||||
|
||||
if(NOT WITH_GHOST_WAYLAND_DYNLOAD)
|
||||
list(APPEND PLATFORM_LINKLIBS
|
||||
${wayland-client_LINK_LIBRARIES}
|
||||
${wayland-egl_LINK_LIBRARIES}
|
||||
${wayland-cursor_LINK_LIBRARIES}
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_GHOST_WAYLAND_DBUS)
|
||||
list(APPEND PLATFORM_LINKLIBS
|
||||
${dbus_LINK_LIBRARIES}
|
||||
)
|
||||
add_definitions(-DWITH_GHOST_WAYLAND_DBUS)
|
||||
endif()
|
||||
|
||||
if(WITH_GHOST_WAYLAND_LIBDECOR)
|
||||
if(NOT WITH_GHOST_WAYLAND_DYNLOAD)
|
||||
list(APPEND PLATFORM_LINKLIBS
|
||||
${libdecor_LIBRARIES}
|
||||
)
|
||||
endif()
|
||||
add_definitions(-DWITH_GHOST_WAYLAND_LIBDECOR)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_GHOST_X11)
|
||||
|
@@ -950,6 +950,3 @@ endif()
|
||||
|
||||
set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
|
||||
set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
|
||||
|
||||
set(LEVEL_ZERO_ROOT_DIR ${LIBDIR}/level_zero)
|
||||
set(SYCL_ROOT_DIR ${LIBDIR}/dpcpp)
|
||||
|
@@ -54,8 +54,6 @@ buildbot:
|
||||
version: '10.1.243'
|
||||
cuda11:
|
||||
version: '11.4.1'
|
||||
hip:
|
||||
version: '5.2.21440'
|
||||
optix:
|
||||
version: '7.3.0'
|
||||
cmake:
|
||||
|
@@ -1,111 +1,59 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
"""
|
||||
---------------
|
||||
Dump the python API into a text file so we can generate changelogs.
|
||||
|
||||
Dump the python API into a JSON file, or generate changelogs from those JSON API dumps.
|
||||
output from this tool should be added into "doc/python_api/rst/change_log.rst"
|
||||
|
||||
Typically, changelog output from this tool should be added into "doc/python_api/rst/change_log.rst"
|
||||
# dump api blender_version.py in CWD
|
||||
blender --background --python doc/python_api/sphinx_changelog_gen.py -- --dump
|
||||
|
||||
API dump files are saved together with the generated API doc on the server, with a general index file.
|
||||
This way the changelog generation simply needs to re-download the previous version's dump for the diffing process.
|
||||
|
||||
---------------
|
||||
|
||||
# Dump api blender_version.json in CWD:
|
||||
blender --background --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
|
||||
--indexpath="path/to/api/docs/api_dump_index.json" \
|
||||
dump --filepath-out="path/to/api/docs/<version>/api_dump.json"
|
||||
|
||||
# Create changelog:
|
||||
# create changelog
|
||||
blender --background --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
|
||||
--indexpath="path/to/api/docs/api_dump_index.json" \
|
||||
changelog --filepath-out doc/python_api/rst/change_log.rst
|
||||
--api_from blender_2_63_0.py \
|
||||
--api_to blender_2_64_0.py \
|
||||
--api_out changes.rst
|
||||
|
||||
# Api comparison can also run without blender,
|
||||
# will by default generate changeloig between the last two available versions listed in the index,
|
||||
# unless input files are provided explicitely:
|
||||
|
||||
# Api comparison can also run without blender
|
||||
python doc/python_api/sphinx_changelog_gen.py -- \
|
||||
--indexpath="path/to/api/docs/api_dump_index.json" \
|
||||
changelog --filepath-in-from blender_api_2_63_0.json \
|
||||
--filepath-in-to blender_api_2_64_0.json \
|
||||
--filepath-out changes.rst
|
||||
--api_from blender_api_2_63_0.py \
|
||||
--api_to blender_api_2_64_0.py \
|
||||
--api_out changes.rst
|
||||
|
||||
--------------
|
||||
|
||||
API dump index format:
|
||||
|
||||
{[version_main, version_sub]: "<version>/api_dump.json", ...
|
||||
}
|
||||
|
||||
API dump format:
|
||||
|
||||
[
|
||||
[version_main, vserion_sub, version_path],
|
||||
{"module.name":
|
||||
{"parent.class":
|
||||
{"basic_type", "member_name":
|
||||
["Name", type, range, length, default, descr, f_args, f_arg_types, f_ret_types]}, ...
|
||||
}, ...
|
||||
}
|
||||
]
|
||||
# Save the latest API dump in this folder, renaming it with its revision.
|
||||
# This way the next person updating it doesn't need to build an old Blender only for that
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
# format
|
||||
'''
|
||||
{"module.name":
|
||||
{"parent.class":
|
||||
{"basic_type", "member_name":
|
||||
("Name", type, range, length, default, descr, f_args, f_arg_types, f_ret_types)}, ...
|
||||
}, ...
|
||||
}
|
||||
'''
|
||||
|
||||
api_names = "basic_type" "name", "type", "range", "length", "default", "descr", "f_args", "f_arg_types", "f_ret_types"
|
||||
|
||||
API_BASIC_TYPE = 0
|
||||
API_F_ARGS = 7
|
||||
|
||||
|
||||
def api_version():
|
||||
try:
|
||||
import bpy
|
||||
except:
|
||||
return None, None
|
||||
version = tuple(bpy.app.version[:2])
|
||||
version_key = "%d.%d" % (version[0], version[1])
|
||||
return version, version_key
|
||||
def api_dunp_fname():
|
||||
import bpy
|
||||
return "blender_api_%s.py" % "_".join([str(i) for i in bpy.app.version])
|
||||
|
||||
|
||||
def api_version_previous_in_index(index, version):
|
||||
print("Searching for previous version to %s in %r" % (version, index))
|
||||
version_prev = (version[0], version[1])
|
||||
while True:
|
||||
version_prev = (version_prev[0], version_prev[1] - 1)
|
||||
if version_prev[1] < 0:
|
||||
version_prev = (version_prev[0] - 1, 99)
|
||||
if version_prev[0] < 0:
|
||||
return None, None
|
||||
version_prev_key = "%d.%d" % (version_prev[0], version_prev[1])
|
||||
if version_prev_key in index:
|
||||
print("Found previous version %s: %r" % (version_prev, index[version_prev_key]))
|
||||
return version_prev, version_prev_key
|
||||
|
||||
|
||||
class JSONEncoderAPIDump(json.JSONEncoder):
|
||||
def default(self, o):
|
||||
if o is ...:
|
||||
return "..."
|
||||
if isinstance(o, set):
|
||||
return tuple(o)
|
||||
return json.JSONEncoder.default(self, o)
|
||||
|
||||
|
||||
def api_dump(args):
|
||||
import rna_info
|
||||
import inspect
|
||||
|
||||
version, version_key = api_version()
|
||||
if version is None:
|
||||
raise(ValueError("API dumps can only be generated from within Blender."))
|
||||
|
||||
def api_dump():
|
||||
dump = {}
|
||||
dump_module = dump["bpy.types"] = {}
|
||||
|
||||
import rna_info
|
||||
import inspect
|
||||
|
||||
struct = rna_info.BuildRNAInfo()[0]
|
||||
for struct_id, struct_info in sorted(struct.items()):
|
||||
|
||||
@@ -207,25 +155,17 @@ def api_dump(args):
|
||||
)
|
||||
del funcs
|
||||
|
||||
filepath_out = args.filepath_out
|
||||
with open(filepath_out, 'w', encoding='utf-8') as file_handle:
|
||||
json.dump((version, dump), file_handle, cls=JSONEncoderAPIDump)
|
||||
import pprint
|
||||
|
||||
indexpath = args.indexpath
|
||||
rootpath = os.path.dirname(indexpath)
|
||||
if os.path.exists(indexpath):
|
||||
with open(indexpath, 'r', encoding='utf-8') as file_handle:
|
||||
index = json.load(file_handle)
|
||||
else:
|
||||
index = {}
|
||||
index[version_key] = os.path.relpath(filepath_out, rootpath)
|
||||
with open(indexpath, 'w', encoding='utf-8') as file_handle:
|
||||
json.dump(index, file_handle)
|
||||
|
||||
print("API version %s dumped into %r, and index %r has been updated" % (version_key, filepath_out, indexpath))
|
||||
filename = api_dunp_fname()
|
||||
filehandle = open(filename, 'w', encoding='utf-8')
|
||||
tot = filehandle.write(pprint.pformat(dump, width=1))
|
||||
filehandle.close()
|
||||
print("%s, %d bytes written" % (filename, tot))
|
||||
|
||||
|
||||
def compare_props(a, b, fuzz=0.75):
|
||||
|
||||
# must be same basic_type, function != property
|
||||
if a[0] != b[0]:
|
||||
return False
|
||||
@@ -240,44 +180,15 @@ def compare_props(a, b, fuzz=0.75):
|
||||
return ((tot / totlen) >= fuzz)
|
||||
|
||||
|
||||
def api_changelog(args):
|
||||
indexpath = args.indexpath
|
||||
filepath_in_from = args.filepath_in_from
|
||||
filepath_in_to = args.filepath_in_to
|
||||
filepath_out = args.filepath_out
|
||||
def api_changelog(api_from, api_to, api_out):
|
||||
|
||||
rootpath = os.path.dirname(indexpath)
|
||||
file_handle = open(api_from, 'r', encoding='utf-8')
|
||||
dict_from = eval(file_handle.read())
|
||||
file_handle.close()
|
||||
|
||||
version, version_key = api_version()
|
||||
if version is None and (filepath_in_from is None or filepath_in_to is None):
|
||||
raise(ValueError("API dumps files must be given when ran outside of Blender."))
|
||||
|
||||
with open(indexpath, 'r', encoding='utf-8') as file_handle:
|
||||
index = json.load(file_handle)
|
||||
|
||||
if filepath_in_to is None:
|
||||
filepath_in_to = index.get(version_key, None)
|
||||
if filepath_in_to is None:
|
||||
raise(ValueError("Cannot find API dump file for Blender version " + str(version) + " in index file."))
|
||||
|
||||
print("Found to file: %r" % filepath_in_to)
|
||||
|
||||
if filepath_in_from is None:
|
||||
version_from, version_from_key = api_version_previous_in_index(index, version)
|
||||
if version_from is None:
|
||||
raise(ValueError("No previous version of Blender could be found in the index."))
|
||||
filepath_in_from = index.get(version_from_key, None)
|
||||
if filepath_in_from is None:
|
||||
raise(ValueError("Cannot find API dump file for previous Blender version " + str(version_from) + " in index file."))
|
||||
|
||||
print("Found from file: %r" % filepath_in_from)
|
||||
|
||||
with open(os.path.join(rootpath, filepath_in_from), 'r', encoding='utf-8') as file_handle:
|
||||
_, dict_from = json.load(file_handle)
|
||||
|
||||
with open(os.path.join(rootpath, filepath_in_to), 'r', encoding='utf-8') as file_handle:
|
||||
dump_version, dict_to = json.load(file_handle)
|
||||
assert(tuple(dump_version) == version)
|
||||
file_handle = open(api_to, 'r', encoding='utf-8')
|
||||
dict_to = eval(file_handle.read())
|
||||
file_handle.close()
|
||||
|
||||
api_changes = []
|
||||
|
||||
@@ -338,66 +249,63 @@ def api_changelog(args):
|
||||
|
||||
# also document function argument changes
|
||||
|
||||
with open(filepath_out, 'w', encoding='utf-8') as fout:
|
||||
fw = fout.write
|
||||
fout = open(api_out, 'w', encoding='utf-8')
|
||||
fw = fout.write
|
||||
# print(api_changes)
|
||||
|
||||
# Write header.
|
||||
fw(""
|
||||
":tocdepth: 2\n"
|
||||
"\n"
|
||||
"Blender API Change Log\n"
|
||||
"**********************\n"
|
||||
"\n"
|
||||
".. note, this document is auto generated by sphinx_changelog_gen.py\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"%s to %s\n"
|
||||
"============\n"
|
||||
"\n" % (version_from_key, version_key))
|
||||
# :class:`bpy_struct.id_data`
|
||||
|
||||
def write_title(title, title_char):
|
||||
fw("%s\n%s\n\n" % (title, title_char * len(title)))
|
||||
def write_title(title, title_char):
|
||||
fw("%s\n%s\n\n" % (title, title_char * len(title)))
|
||||
|
||||
for mod_id, class_id, props_moved, props_new, props_old, func_args in api_changes:
|
||||
class_name = class_id.split(".")[-1]
|
||||
title = mod_id + "." + class_name
|
||||
write_title(title, "-")
|
||||
for mod_id, class_id, props_moved, props_new, props_old, func_args in api_changes:
|
||||
class_name = class_id.split(".")[-1]
|
||||
title = mod_id + "." + class_name
|
||||
write_title(title, "-")
|
||||
|
||||
if props_new:
|
||||
write_title("Added", "^")
|
||||
for prop_id in props_new:
|
||||
fw("* :class:`%s.%s.%s`\n" % (mod_id, class_name, prop_id))
|
||||
fw("\n")
|
||||
if props_new:
|
||||
write_title("Added", "^")
|
||||
for prop_id in props_new:
|
||||
fw("* :class:`%s.%s.%s`\n" % (mod_id, class_name, prop_id))
|
||||
fw("\n")
|
||||
|
||||
if props_old:
|
||||
write_title("Removed", "^")
|
||||
for prop_id in props_old:
|
||||
fw("* **%s**\n" % prop_id) # can't link to removed docs
|
||||
fw("\n")
|
||||
if props_old:
|
||||
write_title("Removed", "^")
|
||||
for prop_id in props_old:
|
||||
fw("* **%s**\n" % prop_id) # can't link to removed docs
|
||||
fw("\n")
|
||||
|
||||
if props_moved:
|
||||
write_title("Renamed", "^")
|
||||
for prop_id_old, prop_id in props_moved:
|
||||
fw("* **%s** -> :class:`%s.%s.%s`\n" % (prop_id_old, mod_id, class_name, prop_id))
|
||||
fw("\n")
|
||||
if props_moved:
|
||||
write_title("Renamed", "^")
|
||||
for prop_id_old, prop_id in props_moved:
|
||||
fw("* **%s** -> :class:`%s.%s.%s`\n" % (prop_id_old, mod_id, class_name, prop_id))
|
||||
fw("\n")
|
||||
|
||||
if func_args:
|
||||
write_title("Function Arguments", "^")
|
||||
for func_id, args_old, args_new in func_args:
|
||||
args_new = ", ".join(args_new)
|
||||
args_old = ", ".join(args_old)
|
||||
fw("* :class:`%s.%s.%s` (%s), *was (%s)*\n" % (mod_id, class_name, func_id, args_new, args_old))
|
||||
fw("\n")
|
||||
if func_args:
|
||||
write_title("Function Arguments", "^")
|
||||
for func_id, args_old, args_new in func_args:
|
||||
args_new = ", ".join(args_new)
|
||||
args_old = ", ".join(args_old)
|
||||
fw("* :class:`%s.%s.%s` (%s), *was (%s)*\n" % (mod_id, class_name, func_id, args_new, args_old))
|
||||
fw("\n")
|
||||
|
||||
print("Written: %r" % filepath_out)
|
||||
fout.close()
|
||||
|
||||
print("Written: %r" % api_out)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
def main():
|
||||
import sys
|
||||
import argparse
|
||||
import os
|
||||
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
try:
|
||||
import argparse
|
||||
except ImportError:
|
||||
print("Old Blender, just dumping")
|
||||
api_dump()
|
||||
return
|
||||
|
||||
argv = sys.argv
|
||||
|
||||
if "--" not in argv:
|
||||
argv = [] # as if no args are passed
|
||||
@@ -408,42 +316,42 @@ def main(argv=None):
|
||||
usage_text = "Run blender in background mode with this script: "
|
||||
"blender --background --factory-startup --python %s -- [options]" % os.path.basename(__file__)
|
||||
|
||||
parser = argparse.ArgumentParser(description=usage_text,
|
||||
epilog=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
epilog = "Run this before releases"
|
||||
|
||||
parser = argparse.ArgumentParser(description=usage_text, epilog=epilog)
|
||||
|
||||
parser.add_argument(
|
||||
"--indexpath", dest="indexpath", metavar='FILE', required=True,
|
||||
help="Path of the JSON file containing the index of all available API dumps.")
|
||||
"--dump", dest="dump", action='store_true',
|
||||
help="When set the api will be dumped into blender_version.py")
|
||||
|
||||
parser_commands = parser.add_subparsers(required=True)
|
||||
parser.add_argument(
|
||||
"--api_from", dest="api_from", metavar='FILE',
|
||||
help="File to compare from (previous version)")
|
||||
parser.add_argument(
|
||||
"--api_to", dest="api_to", metavar='FILE',
|
||||
help="File to compare from (current)")
|
||||
parser.add_argument(
|
||||
"--api_out", dest="api_out", metavar='FILE',
|
||||
help="Output sphinx changelog")
|
||||
|
||||
parser_dump = parser_commands.add_parser('dump', help="Dump the current Blender Python API into a JSON file.")
|
||||
parser_dump.add_argument(
|
||||
"--filepath-out", dest="filepath_out", metavar='FILE', required=True,
|
||||
help="Path of the JSON file containing the dump of the API.")
|
||||
parser_dump.set_defaults(func=api_dump)
|
||||
args = parser.parse_args(argv) # In this example we won't use the args
|
||||
|
||||
parser_changelog = parser_commands.add_parser(
|
||||
'changelog',
|
||||
help="Generate the RST changelog page based on two Blender Python API JSON dumps.",
|
||||
)
|
||||
if not argv:
|
||||
print("No args given!")
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
parser_changelog.add_argument(
|
||||
"--filepath-in-from", dest="filepath_in_from", metavar='FILE', default=None,
|
||||
help="JSON dump file to compare from (typically, previous version). "
|
||||
"If not given, will be automatically determined from current Blender version and index file.")
|
||||
parser_changelog.add_argument(
|
||||
"--filepath-in-to", dest="filepath_in_to", metavar='FILE', default=None,
|
||||
help="JSON dump file to compare to (typically, current version). "
|
||||
"If not given, will be automatically determined from current Blender version and index file.")
|
||||
parser_changelog.add_argument(
|
||||
"--filepath-out", dest="filepath_out", metavar='FILE', required=True,
|
||||
help="Output sphinx changelog RST file.")
|
||||
parser_changelog.set_defaults(func=api_changelog)
|
||||
if args.dump:
|
||||
api_dump()
|
||||
else:
|
||||
if args.api_from and args.api_to and args.api_out:
|
||||
api_changelog(args.api_from, args.api_to, args.api_out)
|
||||
else:
|
||||
print("Error: --api_from/api_to/api_out args needed")
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
args.func(args)
|
||||
print("batch job finished, exiting")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@@ -141,26 +141,6 @@ def handle_args():
|
||||
required=False,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--api-changelog-generate",
|
||||
dest="changelog",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Generate the API changelog RST file "
|
||||
"(default=False, requires `--api-dump-index-path` parameter)",
|
||||
required=False,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--api-dump-index-path",
|
||||
dest="api_dump_index_path",
|
||||
metavar='FILE',
|
||||
default=None,
|
||||
help="Path to the API dump index JSON file "
|
||||
"(required when `--api-changelog-generate` is True)",
|
||||
required=False,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o", "--output",
|
||||
dest="output_dir",
|
||||
@@ -534,42 +514,6 @@ if ARGS.sphinx_build_pdf:
|
||||
sphinx_make_pdf_log = os.path.join(ARGS.output_dir, ".latex_make.log")
|
||||
SPHINX_MAKE_PDF_STDOUT = open(sphinx_make_pdf_log, "w", encoding="utf-8")
|
||||
|
||||
|
||||
# --------------------------------CHANGELOG GENERATION--------------------------------------
|
||||
|
||||
def generate_changelog():
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"sphinx_changelog_gen",
|
||||
os.path.abspath(os.path.join(SCRIPT_DIR, "sphinx_changelog_gen.py")),
|
||||
)
|
||||
sphinx_changelog_gen = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(sphinx_changelog_gen)
|
||||
|
||||
API_DUMP_INDEX_FILEPATH = ARGS.api_dump_index_path
|
||||
API_DUMP_ROOT = os.path.dirname(API_DUMP_INDEX_FILEPATH)
|
||||
API_DUMP_FILEPATH = os.path.abspath(os.path.join(API_DUMP_ROOT, BLENDER_VERSION_DOTS, "api_dump.json"))
|
||||
API_CHANGELOG_FILEPATH = os.path.abspath(os.path.join(SPHINX_IN_TMP, "change_log.rst"))
|
||||
|
||||
sphinx_changelog_gen.main((
|
||||
"--",
|
||||
"--indexpath",
|
||||
API_DUMP_INDEX_FILEPATH,
|
||||
"dump",
|
||||
"--filepath-out",
|
||||
API_DUMP_FILEPATH,
|
||||
))
|
||||
|
||||
sphinx_changelog_gen.main((
|
||||
"--",
|
||||
"--indexpath",
|
||||
API_DUMP_INDEX_FILEPATH,
|
||||
"changelog",
|
||||
"--filepath-out",
|
||||
API_CHANGELOG_FILEPATH,
|
||||
))
|
||||
|
||||
|
||||
# --------------------------------API DUMP--------------------------------------
|
||||
|
||||
# Lame, python won't give some access.
|
||||
@@ -1529,8 +1473,7 @@ def pyrna2sphinx(basepath):
|
||||
else:
|
||||
fw(".. class:: %s\n\n" % struct_id)
|
||||
|
||||
write_indented_lines(" ", fw, struct.description, False)
|
||||
fw("\n")
|
||||
fw(" %s\n\n" % struct.description)
|
||||
|
||||
# Properties sorted in alphabetical order.
|
||||
sorted_struct_properties = struct.properties[:]
|
||||
@@ -2530,9 +2473,6 @@ def main():
|
||||
|
||||
rna2sphinx(SPHINX_IN_TMP)
|
||||
|
||||
if ARGS.changelog:
|
||||
generate_changelog()
|
||||
|
||||
if ARGS.full_rebuild:
|
||||
# Only for full updates.
|
||||
shutil.rmtree(SPHINX_IN, True)
|
||||
|
22
extern/audaspace/bindings/C/AUD_Special.cpp
vendored
22
extern/audaspace/bindings/C/AUD_Special.cpp
vendored
@@ -270,7 +270,7 @@ AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int sampl
|
||||
return length;
|
||||
}
|
||||
|
||||
AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
|
||||
AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -282,20 +282,15 @@ AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int lengt
|
||||
std::shared_ptr<IWriter> writer = FileWriter::createWriter(filename, convCToDSpec(specs), static_cast<Container>(format), static_cast<Codec>(codec), bitrate);
|
||||
FileWriter::writeReader(reader, writer, length, buffersize, callback, data);
|
||||
|
||||
return true;
|
||||
return nullptr;
|
||||
}
|
||||
catch(Exception& e)
|
||||
{
|
||||
if(error && errorsize)
|
||||
{
|
||||
std::strncpy(error, e.getMessage().c_str(), errorsize);
|
||||
error[errorsize - 1] = '\0';
|
||||
}
|
||||
return false;
|
||||
return e.getMessage().c_str();
|
||||
}
|
||||
}
|
||||
|
||||
AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
|
||||
AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -333,16 +328,11 @@ AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsign
|
||||
reader->seek(start);
|
||||
FileWriter::writeReader(reader, writers, length, buffersize, callback, data);
|
||||
|
||||
return true;
|
||||
return nullptr;
|
||||
}
|
||||
catch(Exception& e)
|
||||
{
|
||||
if(error && errorsize)
|
||||
{
|
||||
std::strncpy(error, e.getMessage().c_str(), errorsize);
|
||||
error[errorsize - 1] = '\0';
|
||||
}
|
||||
return false;
|
||||
return e.getMessage().c_str();
|
||||
}
|
||||
}
|
||||
|
||||
|
16
extern/audaspace/bindings/C/AUD_Special.h
vendored
16
extern/audaspace/bindings/C/AUD_Special.h
vendored
@@ -70,15 +70,13 @@ extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, in
|
||||
* \param bitrate The bitrate for encoding.
|
||||
* \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
|
||||
* \param data Pass through parameter that is passed to the callback.
|
||||
* \param error String buffer to copy the error message to in case of failure.
|
||||
* \param errorsize The size of the error buffer.
|
||||
* \return Whether or not the operation succeeded.
|
||||
* \return An error message or NULL in case of success.
|
||||
*/
|
||||
extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
unsigned int buffersize, const char* filename,
|
||||
AUD_DeviceSpecs specs, AUD_Container format,
|
||||
AUD_Codec codec, unsigned int bitrate,
|
||||
void(*callback)(float, void*), void* data, char* error, size_t errorsize);
|
||||
void(*callback)(float, void*), void* data);
|
||||
|
||||
/**
|
||||
* Mixes a sound down into multiple files.
|
||||
@@ -93,15 +91,13 @@ extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned in
|
||||
* \param bitrate The bitrate for encoding.
|
||||
* \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
|
||||
* \param data Pass through parameter that is passed to the callback.
|
||||
* \param error String buffer to copy the error message to in case of failure.
|
||||
* \param errorsize The size of the error buffer.
|
||||
* \return Whether or not the operation succeeded.
|
||||
* \return An error message or NULL in case of success.
|
||||
*/
|
||||
extern AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
extern AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
|
||||
unsigned int buffersize, const char* filename,
|
||||
AUD_DeviceSpecs specs, AUD_Container format,
|
||||
AUD_Codec codec, unsigned int bitrate,
|
||||
void(*callback)(float, void*), void* data, char* error, size_t errorsize);
|
||||
void(*callback)(float, void*), void* data);
|
||||
|
||||
/**
|
||||
* Opens a read device and prepares it for mixdown of the sound scene.
|
||||
|
@@ -41,7 +41,7 @@ double PulseAudioDevice::PulseAudioSynchronizer::getPosition(std::shared_ptr<IHa
|
||||
|
||||
void PulseAudioDevice::updateRingBuffer()
|
||||
{
|
||||
unsigned int samplesize = AUD_DEVICE_SAMPLE_SIZE(m_specs);
|
||||
unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs);
|
||||
|
||||
std::unique_lock<std::mutex> lock(m_mixingLock);
|
||||
|
||||
|
2
extern/curve_fit_nd/README.blender
vendored
2
extern/curve_fit_nd/README.blender
vendored
@@ -1,5 +1,5 @@
|
||||
Project: Curve-Fit-nD
|
||||
URL: https://github.com/ideasman42/curve-fit-nd
|
||||
License: BSD 3-Clause
|
||||
Upstream version: ae32da9de264c3ed399673e2bc1bc09003799416 (Last Release)
|
||||
Upstream version: ddcd5bd (Last Release)
|
||||
Local modifications: None
|
||||
|
12
extern/curve_fit_nd/curve_fit_nd.h
vendored
12
extern/curve_fit_nd/curve_fit_nd.h
vendored
@@ -39,7 +39,7 @@
|
||||
* Takes a flat array of points and evaluates that to calculate a bezier spline.
|
||||
*
|
||||
* \param points, points_len: The array of points to calculate a cubics from.
|
||||
* \param dims: The number of dimensions for each element in \a points.
|
||||
* \param dims: The number of dimensions for for each element in \a points.
|
||||
* \param error_threshold: the error threshold to allow for,
|
||||
* the curve will be within this distance from \a points.
|
||||
* \param corners, corners_len: indices for points which will not have aligned tangents (optional).
|
||||
@@ -47,10 +47,10 @@
|
||||
* to evaluate a line to detect corner indices.
|
||||
*
|
||||
* \param r_cubic_array, r_cubic_array_len: Resulting array of tangents and knots, formatted as follows:
|
||||
* `r_cubic_array[r_cubic_array_len][3][dims]`,
|
||||
* ``r_cubic_array[r_cubic_array_len][3][dims]``,
|
||||
* where each point has 0 and 2 for the tangents and the middle index 1 for the knot.
|
||||
* The size of the *flat* array will be `r_cubic_array_len * 3 * dims`.
|
||||
* \param r_corner_index_array, r_corner_index_len: Corner indices in \a r_cubic_array (optional).
|
||||
* The size of the *flat* array will be ``r_cubic_array_len * 3 * dims``.
|
||||
* \param r_corner_index_array, r_corner_index_len: Corner indices in in \a r_cubic_array (optional).
|
||||
* This allows you to access corners on the resulting curve.
|
||||
*
|
||||
* \returns zero on success, nonzero is reserved for error values.
|
||||
@@ -85,7 +85,7 @@ int curve_fit_cubic_to_points_fl(
|
||||
* Takes a flat array of points and evaluates that to calculate handle lengths.
|
||||
*
|
||||
* \param points, points_len: The array of points to calculate a cubics from.
|
||||
* \param dims: The number of dimensions for each element in \a points.
|
||||
* \param dims: The number of dimensions for for each element in \a points.
|
||||
* \param points_length_cache: Optional pre-calculated lengths between points.
|
||||
* \param error_threshold: the error threshold to allow for,
|
||||
* \param tan_l, tan_r: Normalized tangents the handles will be aligned to.
|
||||
@@ -166,7 +166,7 @@ int curve_fit_cubic_to_points_refit_fl(
|
||||
* A helper function that takes a line and outputs its corner indices.
|
||||
*
|
||||
* \param points, points_len: Curve to evaluate.
|
||||
* \param dims: The number of dimensions for each element in \a points.
|
||||
* \param dims: The number of dimensions for for each element in \a points.
|
||||
* \param radius_min: Corners on the curve between points below this radius are ignored.
|
||||
* \param radius_max: Corners on the curve above this radius are ignored.
|
||||
* \param samples_max: Prevent testing corners beyond this many points
|
||||
|
126
extern/curve_fit_nd/intern/curve_fit_cubic.c
vendored
126
extern/curve_fit_nd/intern/curve_fit_cubic.c
vendored
@@ -43,24 +43,20 @@
|
||||
|
||||
#include "../curve_fit_nd.h"
|
||||
|
||||
/** Take curvature into account when calculating the least square solution isn't usable. */
|
||||
/* Take curvature into account when calculating the least square solution isn't usable. */
|
||||
#define USE_CIRCULAR_FALLBACK
|
||||
|
||||
/**
|
||||
* Use the maximum distance of any points from the direct line between 2 points
|
||||
/* Use the maximum distance of any points from the direct line between 2 points
|
||||
* to calculate how long the handles need to be.
|
||||
* Can do a 'perfect' reversal of subdivision when for curve has symmetrical handles and doesn't change direction
|
||||
* (as with an 'S' shape).
|
||||
*/
|
||||
* (as with an 'S' shape). */
|
||||
#define USE_OFFSET_FALLBACK
|
||||
|
||||
/** Avoid re-calculating lengths multiple times. */
|
||||
/* avoid re-calculating lengths multiple times */
|
||||
#define USE_LENGTH_CACHE
|
||||
|
||||
/**
|
||||
* Store the indices in the cubic data so we can return the original indices,
|
||||
* useful when the caller has data associated with the curve.
|
||||
*/
|
||||
/* store the indices in the cubic data so we can return the original indices,
|
||||
* useful when the caller has data associated with the curve. */
|
||||
#define USE_ORIG_INDEX_DATA
|
||||
|
||||
typedef unsigned int uint;
|
||||
@@ -99,15 +95,13 @@ typedef unsigned int uint;
|
||||
* \{ */
|
||||
|
||||
typedef struct Cubic {
|
||||
/** Single linked lists. */
|
||||
/* single linked lists */
|
||||
struct Cubic *next;
|
||||
#ifdef USE_ORIG_INDEX_DATA
|
||||
uint orig_span;
|
||||
#endif
|
||||
/**
|
||||
* 0: point_0, 1: handle_0, 2: handle_1, 3: point_1,
|
||||
* each one is offset by 'dims'.
|
||||
*/
|
||||
/* 0: point_0, 1: handle_0, 2: handle_1, 3: point_1,
|
||||
* each one is offset by 'dims' */
|
||||
double pt_data[0];
|
||||
} Cubic;
|
||||
|
||||
@@ -201,7 +195,7 @@ static double *cubic_list_as_array(
|
||||
bool use_orig_index = (r_orig_index != NULL);
|
||||
#endif
|
||||
|
||||
/* Fill the array backwards. */
|
||||
/* fill the array backwards */
|
||||
const size_t array_chunk = 3 * dims;
|
||||
double *array_iter = array + array_flat_len;
|
||||
for (Cubic *citer = clist->items; citer; citer = citer->next) {
|
||||
@@ -227,15 +221,15 @@ static double *cubic_list_as_array(
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Flip tangent for first and last (we could leave at zero, but set to something useful). */
|
||||
/* flip tangent for first and last (we could leave at zero, but set to something useful) */
|
||||
|
||||
/* First. */
|
||||
/* first */
|
||||
array_iter -= array_chunk;
|
||||
memcpy(&array_iter[dims], handle_prev, sizeof(double) * 2 * dims);
|
||||
flip_vn_vnvn(&array_iter[0 * dims], &array_iter[1 * dims], &array_iter[2 * dims], dims);
|
||||
assert(array == array_iter);
|
||||
|
||||
/* Last. */
|
||||
/* last */
|
||||
array_iter += array_flat_len - (3 * dims);
|
||||
flip_vn_vnvn(&array_iter[2 * dims], &array_iter[1 * dims], &array_iter[0 * dims], dims);
|
||||
|
||||
@@ -461,7 +455,7 @@ static double points_calc_circumference_factor(
|
||||
const double dot = dot_vnvn(tan_l, tan_r, dims);
|
||||
const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims);
|
||||
if (len_tangent > DBL_EPSILON) {
|
||||
/* Only clamp to avoid precision error. */
|
||||
/* only clamp to avoid precision error */
|
||||
double angle = acos(max(-fabs(dot), -1.0));
|
||||
/* Angle may be less than the length when the tangents define >180 degrees of the circle,
|
||||
* (tangents that point away from each other).
|
||||
@@ -472,7 +466,7 @@ static double points_calc_circumference_factor(
|
||||
return factor;
|
||||
}
|
||||
else {
|
||||
/* Tangents are exactly aligned (think two opposite sides of a circle). */
|
||||
/* tangents are exactly aligned (think two opposite sides of a circle). */
|
||||
return (M_PI / 2);
|
||||
}
|
||||
}
|
||||
@@ -491,18 +485,18 @@ static double points_calc_circle_tangent_factor(
|
||||
const double eps = 1e-8;
|
||||
const double tan_dot = dot_vnvn(tan_l, tan_r, dims);
|
||||
if (tan_dot > 1.0 - eps) {
|
||||
/* No angle difference (use fallback, length won't make any difference). */
|
||||
/* no angle difference (use fallback, length wont make any difference) */
|
||||
return (1.0 / 3.0) * 0.75;
|
||||
}
|
||||
else if (tan_dot < -1.0 + eps) {
|
||||
/* Parallel tangents (half-circle). */
|
||||
/* parallel tangents (half-circle) */
|
||||
return (1.0 / 2.0);
|
||||
}
|
||||
else {
|
||||
/* Non-aligned tangents, calculate handle length. */
|
||||
/* non-aligned tangents, calculate handle length */
|
||||
const double angle = acos(tan_dot) / 2.0;
|
||||
|
||||
/* Could also use `angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0`. */
|
||||
/* could also use 'angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0' */
|
||||
const double angle_sin = sin(angle);
|
||||
const double angle_cos = cos(angle);
|
||||
return ((1.0 - angle_cos) / (angle_sin * 2.0)) / angle_sin;
|
||||
@@ -522,15 +516,15 @@ static double points_calc_cubic_scale(
|
||||
const double len_direct = len_vnvn(v_l, v_r, dims);
|
||||
const double len_circle_factor = points_calc_circle_tangent_factor(tan_l, tan_r, dims);
|
||||
|
||||
/* If this curve is a circle, this value doesn't need modification. */
|
||||
/* if this curve is a circle, this value doesn't need modification */
|
||||
const double len_circle_handle = (len_direct * (len_circle_factor / 0.75));
|
||||
|
||||
/* Scale by the difference from the circumference distance. */
|
||||
/* scale by the difference from the circumference distance */
|
||||
const double len_circle = len_direct * points_calc_circumference_factor(tan_l, tan_r, dims);
|
||||
double scale_handle = (coords_length / len_circle);
|
||||
|
||||
/* Could investigate an accurate calculation here,
|
||||
* though this gives close results. */
|
||||
* though this gives close results */
|
||||
scale_handle = ((scale_handle - 1.0) * 1.75) + 1.0;
|
||||
|
||||
return len_circle_handle * scale_handle;
|
||||
@@ -560,8 +554,9 @@ static void cubic_from_points_fallback(
|
||||
r_cubic->orig_span = (points_offset_len - 1);
|
||||
#endif
|
||||
|
||||
/* `p1 = p0 - (tan_l * alpha);`
|
||||
* `p2 = p3 + (tan_r * alpha);` */
|
||||
/* p1 = p0 - (tan_l * alpha);
|
||||
* p2 = p3 + (tan_r * alpha);
|
||||
*/
|
||||
msub_vn_vnvn_fl(p1, p0, tan_l, alpha, dims);
|
||||
madd_vn_vnvn_fl(p2, p3, tan_r, alpha, dims);
|
||||
}
|
||||
@@ -599,7 +594,7 @@ static void cubic_from_points_offset_fallback(
|
||||
project_plane_vn_vnvn_normalized(a[0], tan_l, dir_unit, dims);
|
||||
project_plane_vn_vnvn_normalized(a[1], tan_r, dir_unit, dims);
|
||||
|
||||
/* Only for better accuracy, not essential. */
|
||||
/* only for better accuracy, not essential */
|
||||
normalize_vn(a[0], dims);
|
||||
normalize_vn(a[1], dims);
|
||||
|
||||
@@ -625,7 +620,7 @@ static void cubic_from_points_offset_fallback(
|
||||
*
|
||||
* The 'dists[..] + dir_dirs' limit is just a rough approximation.
|
||||
* While a more exact value could be calculated,
|
||||
* in this case the error values approach divide by zero (infinite)
|
||||
* in this case the error values approach divide by zero (inf)
|
||||
* so there is no need to be too precise when checking if limits have been exceeded. */
|
||||
|
||||
double alpha_l = (dists[0] / 0.75) / fabs(dot_vnvn(tan_l, a[0], dims));
|
||||
@@ -649,8 +644,9 @@ static void cubic_from_points_offset_fallback(
|
||||
r_cubic->orig_span = (points_offset_len - 1);
|
||||
#endif
|
||||
|
||||
/* `p1 = p0 - (tan_l * alpha_l);`
|
||||
* `p2 = p3 + (tan_r * alpha_r);` */
|
||||
/* p1 = p0 - (tan_l * alpha_l);
|
||||
* p2 = p3 + (tan_r * alpha_r);
|
||||
*/
|
||||
msub_vn_vnvn_fl(p1, p0, tan_l, alpha_l, dims);
|
||||
madd_vn_vnvn_fl(p2, p3, tan_r, alpha_r, dims);
|
||||
}
|
||||
@@ -678,7 +674,7 @@ static void cubic_from_points(
|
||||
const double *p0 = &points_offset[0];
|
||||
const double *p3 = &points_offset[(points_offset_len - 1) * dims];
|
||||
|
||||
/* Point Pairs. */
|
||||
/* Point Pairs */
|
||||
double alpha_l, alpha_r;
|
||||
#ifdef USE_VLA
|
||||
double a[2][dims];
|
||||
@@ -700,7 +696,7 @@ static void cubic_from_points(
|
||||
const double b0_plus_b1 = B0plusB1(u_prime[i]);
|
||||
const double b2_plus_b3 = B2plusB3(u_prime[i]);
|
||||
|
||||
/* Inline dot product. */
|
||||
/* inline dot product */
|
||||
for (uint j = 0; j < dims; j++) {
|
||||
const double tmp = (pt[j] - (p0[j] * b0_plus_b1)) + (p3[j] * b2_plus_b3);
|
||||
|
||||
@@ -723,7 +719,7 @@ static void cubic_from_points(
|
||||
det_C0_C1 = c[0][0] * c[1][1] * 10e-12;
|
||||
}
|
||||
|
||||
/* May still divide-by-zero, check below will catch NAN values. */
|
||||
/* may still divide-by-zero, check below will catch nan values */
|
||||
alpha_l = det_X_C1 / det_C0_C1;
|
||||
alpha_r = det_C_0X / det_C0_C1;
|
||||
}
|
||||
@@ -740,7 +736,7 @@ static void cubic_from_points(
|
||||
|
||||
bool use_clamp = true;
|
||||
|
||||
/* Flip check to catch NAN values. */
|
||||
/* flip check to catch nan values */
|
||||
if (!(alpha_l >= 0.0) ||
|
||||
!(alpha_r >= 0.0))
|
||||
{
|
||||
@@ -754,7 +750,7 @@ static void cubic_from_points(
|
||||
alpha_l = alpha_r = len_vnvn(p0, p3, dims) / 3.0;
|
||||
#endif
|
||||
|
||||
/* Skip clamping when we're using default handles. */
|
||||
/* skip clamping when we're using default handles */
|
||||
use_clamp = false;
|
||||
}
|
||||
|
||||
@@ -768,8 +764,9 @@ static void cubic_from_points(
|
||||
r_cubic->orig_span = (points_offset_len - 1);
|
||||
#endif
|
||||
|
||||
/* `p1 = p0 - (tan_l * alpha_l);`
|
||||
* `p2 = p3 + (tan_r * alpha_r);` */
|
||||
/* p1 = p0 - (tan_l * alpha_l);
|
||||
* p2 = p3 + (tan_r * alpha_r);
|
||||
*/
|
||||
msub_vn_vnvn_fl(p1, p0, tan_l, alpha_l, dims);
|
||||
madd_vn_vnvn_fl(p2, p3, tan_r, alpha_r, dims);
|
||||
|
||||
@@ -784,7 +781,7 @@ static void cubic_from_points(
|
||||
#endif
|
||||
points_calc_center_weighted(points_offset, points_offset_len, dims, center);
|
||||
|
||||
const double clamp_scale = 3.0; /* Clamp to 3x. */
|
||||
const double clamp_scale = 3.0; /* clamp to 3x */
|
||||
double dist_sq_max = 0.0;
|
||||
|
||||
{
|
||||
@@ -793,7 +790,7 @@ static void cubic_from_points(
|
||||
#if 0
|
||||
double dist_sq_test = sq(len_vnvn(center, pt, dims) * clamp_scale);
|
||||
#else
|
||||
/* Do inline. */
|
||||
/* do inline */
|
||||
double dist_sq_test = 0.0;
|
||||
for (uint j = 0; j < dims; j++) {
|
||||
dist_sq_test += sq((pt[j] - center[j]) * clamp_scale);
|
||||
@@ -819,8 +816,10 @@ static void cubic_from_points(
|
||||
alpha_l = alpha_r = len_vnvn(p0, p3, dims) / 3.0;
|
||||
#endif
|
||||
|
||||
/* `p1 = p0 - (tan_l * alpha_l);`
|
||||
* `p2 = p3 + (tan_r * alpha_r);` */
|
||||
/*
|
||||
* p1 = p0 - (tan_l * alpha_l);
|
||||
* p2 = p3 + (tan_r * alpha_r);
|
||||
*/
|
||||
for (uint j = 0; j < dims; j++) {
|
||||
p1[j] = p0[j] - (tan_l[j] * alpha_l);
|
||||
p2[j] = p3[j] + (tan_r[j] * alpha_r);
|
||||
@@ -830,7 +829,7 @@ static void cubic_from_points(
|
||||
p2_dist_sq = len_squared_vnvn(center, p2, dims);
|
||||
}
|
||||
|
||||
/* Clamp within the 3x radius. */
|
||||
/* clamp within the 3x radius */
|
||||
if (p1_dist_sq > dist_sq_max) {
|
||||
isub_vnvn(p1, center, dims);
|
||||
imul_vn_fl(p1, sqrt(dist_sq_max) / sqrt(p1_dist_sq), dims);
|
||||
@@ -842,7 +841,7 @@ static void cubic_from_points(
|
||||
iadd_vnvn(p2, center, dims);
|
||||
}
|
||||
}
|
||||
/* End clamping. */
|
||||
/* end clamping */
|
||||
}
|
||||
|
||||
#ifdef USE_LENGTH_CACHE
|
||||
@@ -918,7 +917,7 @@ static double cubic_find_root(
|
||||
const uint dims)
|
||||
{
|
||||
/* Newton-Raphson Method. */
|
||||
/* All vectors. */
|
||||
/* all vectors */
|
||||
#ifdef USE_VLA
|
||||
double q0_u[dims];
|
||||
double q1_u[dims];
|
||||
@@ -933,8 +932,8 @@ static double cubic_find_root(
|
||||
cubic_calc_speed(cubic, u, dims, q1_u);
|
||||
cubic_calc_acceleration(cubic, u, dims, q2_u);
|
||||
|
||||
/* May divide-by-zero, caller must check for that case. */
|
||||
/* `u - ((q0_u - p) * q1_u) / (q1_u.length_squared() + (q0_u - p) * q2_u)` */
|
||||
/* may divide-by-zero, caller must check for that case */
|
||||
/* u - ((q0_u - p) * q1_u) / (q1_u.length_squared() + (q0_u - p) * q2_u) */
|
||||
isub_vnvn(q0_u, p, dims);
|
||||
return u - dot_vnvn(q0_u, q1_u, dims) /
|
||||
(len_squared_vn(q1_u, dims) + dot_vnvn(q0_u, q2_u, dims));
|
||||
@@ -1033,7 +1032,7 @@ static bool fit_cubic_to_points(
|
||||
double error_max_sq;
|
||||
uint split_index;
|
||||
|
||||
/* Parameterize points, and attempt to fit curve. */
|
||||
/* Parameterize points, and attempt to fit curve */
|
||||
cubic_from_points(
|
||||
points_offset, points_offset_len,
|
||||
#ifdef USE_CIRCULAR_FALLBACK
|
||||
@@ -1041,7 +1040,7 @@ static bool fit_cubic_to_points(
|
||||
#endif
|
||||
u, tan_l, tan_r, dims, r_cubic);
|
||||
|
||||
/* Find max deviation of points to fitted curve. */
|
||||
/* Find max deviation of points to fitted curve */
|
||||
error_max_sq = cubic_calc_error(
|
||||
r_cubic, points_offset, points_offset_len, u, dims,
|
||||
&split_index);
|
||||
@@ -1063,7 +1062,7 @@ static bool fit_cubic_to_points(
|
||||
cubic_test, points_offset, points_offset_len, u, dims,
|
||||
&split_index);
|
||||
|
||||
/* Intentionally use the newly calculated 'split_index',
|
||||
/* intentionally use the newly calculated 'split_index',
|
||||
* even if the 'error_max_sq_test' is worse. */
|
||||
if (error_max_sq > error_max_sq_test) {
|
||||
error_max_sq = error_max_sq_test;
|
||||
@@ -1072,7 +1071,7 @@ static bool fit_cubic_to_points(
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test the offset fallback. */
|
||||
/* Test the offset fallback */
|
||||
#ifdef USE_OFFSET_FALLBACK
|
||||
if (!(error_max_sq < error_threshold_sq)) {
|
||||
/* Using the offset from the curve to calculate cubic handle length may give better results
|
||||
@@ -1096,7 +1095,7 @@ static bool fit_cubic_to_points(
|
||||
if (!(error_max_sq < error_threshold_sq)) {
|
||||
cubic_copy(cubic_test, r_cubic, dims);
|
||||
|
||||
/* If error not too large, try some re-parameterization and iteration. */
|
||||
/* If error not too large, try some reparameterization and iteration */
|
||||
double *u_prime = malloc(sizeof(double) * points_offset_len);
|
||||
for (uint iter = 0; iter < iteration_max; iter++) {
|
||||
if (!cubic_reparameterize(
|
||||
@@ -1124,7 +1123,7 @@ static bool fit_cubic_to_points(
|
||||
}
|
||||
|
||||
if (!(error_max_sq < error_threshold_sq)) {
|
||||
/* Continue. */
|
||||
/* continue */
|
||||
}
|
||||
else {
|
||||
assert((error_max_sq < error_threshold_sq));
|
||||
@@ -1157,7 +1156,7 @@ static void fit_cubic_to_points_recursive(
|
||||
const double error_threshold_sq,
|
||||
const uint calc_flag,
|
||||
const uint dims,
|
||||
/* Fill in the list. */
|
||||
/* fill in the list */
|
||||
CubicList *clist)
|
||||
{
|
||||
Cubic *cubic = cubic_alloc(dims);
|
||||
@@ -1181,7 +1180,7 @@ static void fit_cubic_to_points_recursive(
|
||||
cubic_free(cubic);
|
||||
|
||||
|
||||
/* Fitting failed -- split at max error point and fit recursively. */
|
||||
/* Fitting failed -- split at max error point and fit recursively */
|
||||
|
||||
/* Check splinePoint is not an endpoint?
|
||||
*
|
||||
@@ -1213,7 +1212,7 @@ static void fit_cubic_to_points_recursive(
|
||||
#endif
|
||||
const double *pt = &points_offset[split_index * dims];
|
||||
|
||||
/* `tan_center = ((pt_a - pt).normalized() + (pt - pt_b).normalized()).normalized()`. */
|
||||
/* tan_center = ((pt_a - pt).normalized() + (pt - pt_b).normalized()).normalized() */
|
||||
normalize_vn_vnvn(tan_center_a, pt_a, pt, dims);
|
||||
normalize_vn_vnvn(tan_center_b, pt, pt_b, dims);
|
||||
add_vn_vnvn(tan_center, tan_center_a, tan_center_b, dims);
|
||||
@@ -1307,8 +1306,9 @@ int curve_fit_cubic_to_points_db(
|
||||
const double *pt_l_next = pt_l + dims;
|
||||
const double *pt_r_prev = pt_r - dims;
|
||||
|
||||
/* `tan_l = (pt_l - pt_l_next).normalized();`
|
||||
* `tan_r = (pt_r_prev - pt_r).normalized();` */
|
||||
/* tan_l = (pt_l - pt_l_next).normalized()
|
||||
* tan_r = (pt_r_prev - pt_r).normalized()
|
||||
*/
|
||||
normalize_vn_vnvn(tan_l, pt_l, pt_l_next, dims);
|
||||
normalize_vn_vnvn(tan_r, pt_r_prev, pt_r, dims);
|
||||
|
||||
@@ -1362,7 +1362,7 @@ int curve_fit_cubic_to_points_db(
|
||||
*r_cubic_orig_index = NULL;
|
||||
#endif
|
||||
|
||||
/* Allocate a contiguous array and free the linked list. */
|
||||
/* allocate a contiguous array and free the linked list */
|
||||
*r_cubic_array = cubic_list_as_array(
|
||||
&clist
|
||||
#ifdef USE_ORIG_INDEX_DATA
|
||||
@@ -1454,7 +1454,7 @@ int curve_fit_cubic_to_points_single_db(
|
||||
{
|
||||
Cubic *cubic = alloca(cubic_alloc_size(dims));
|
||||
|
||||
/* In this instance there are no advantage in using length cache,
|
||||
/* in this instance theres no advantage in using length cache,
|
||||
* since we're not recursively calculating values. */
|
||||
#ifdef USE_LENGTH_CACHE
|
||||
double *points_length_cache_alloc = NULL;
|
||||
|
@@ -1490,4 +1490,3 @@ int curve_fit_cubic_to_points_refit_fl(
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -37,7 +37,7 @@
|
||||
* - #TPOOL_STRUCT: Name for pool struct name.
|
||||
* - #TPOOL_CHUNK_SIZE: Chunk size (optional), use 64kb when not defined.
|
||||
*
|
||||
* \note #TPOOL_ALLOC_TYPE must be at least `sizeof(void *)`.
|
||||
* \note #TPOOL_ALLOC_TYPE must be at least ``sizeof(void *)``.
|
||||
*
|
||||
* Defines the API, uses #TPOOL_IMPL_PREFIX to prefix each function.
|
||||
*
|
||||
|
2
extern/curve_fit_nd/intern/generic_heap.c
vendored
2
extern/curve_fit_nd/intern/generic_heap.c
vendored
@@ -305,3 +305,5 @@ void *HEAP_node_ptr(HeapNode *node)
|
||||
{
|
||||
return node->ptr;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
3
extern/mantaflow/README.blender
vendored
3
extern/mantaflow/README.blender
vendored
@@ -2,5 +2,4 @@ Project: Mantaflow
|
||||
URL: http://mantaflow.com/
|
||||
License: Apache 2.0
|
||||
Upstream version: 0.13
|
||||
Local modifications:
|
||||
* ./patches/local_namespace.diff to support loading MANTA variables into an isolated __main__ name-space.
|
||||
Local modifications: None
|
||||
|
31
extern/mantaflow/helper/pwrapper/registry.cpp
vendored
31
extern/mantaflow/helper/pwrapper/registry.cpp
vendored
@@ -115,7 +115,7 @@ class WrapperRegistry {
|
||||
void construct(const std::string &scriptname, const vector<string> &args);
|
||||
void cleanup();
|
||||
void renameObjects();
|
||||
void runPreInit(PyObject *name_space);
|
||||
void runPreInit();
|
||||
PyObject *initModule();
|
||||
ClassData *lookup(const std::string &name);
|
||||
bool canConvert(ClassData *from, ClassData *to);
|
||||
@@ -505,7 +505,7 @@ void WrapperRegistry::addConstants(PyObject *module)
|
||||
}
|
||||
}
|
||||
|
||||
void WrapperRegistry::runPreInit(PyObject *name_space)
|
||||
void WrapperRegistry::runPreInit()
|
||||
{
|
||||
// add python directories to path
|
||||
PyObject *sys_path = PySys_GetObject((char *)"path");
|
||||
@@ -518,15 +518,7 @@ void WrapperRegistry::runPreInit(PyObject *name_space)
|
||||
}
|
||||
if (!mCode.empty()) {
|
||||
mCode = "from manta import *\n" + mCode;
|
||||
PyObject *return_value = PyRun_String(mCode.c_str(), Py_file_input, name_space, name_space);
|
||||
if (return_value == nullptr) {
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Print();
|
||||
}
|
||||
}
|
||||
else {
|
||||
Py_DECREF(return_value);
|
||||
}
|
||||
PyRun_SimpleString(mCode.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,23 +698,16 @@ PyObject *WrapperRegistry::initModule()
|
||||
//******************************************************
|
||||
// Register members and exposed functions
|
||||
|
||||
void setup(const bool python_lifecycle,
|
||||
const std::string &filename,
|
||||
const std::vector<std::string> &args,
|
||||
PyObject *name_space)
|
||||
void setup(const std::string &filename, const std::vector<std::string> &args)
|
||||
{
|
||||
WrapperRegistry::instance().construct(filename, args);
|
||||
if (python_lifecycle) {
|
||||
Py_Initialize();
|
||||
}
|
||||
WrapperRegistry::instance().runPreInit(name_space);
|
||||
Py_Initialize();
|
||||
WrapperRegistry::instance().runPreInit();
|
||||
}
|
||||
|
||||
void finalize(const bool python_lifecycle)
|
||||
void finalize()
|
||||
{
|
||||
if (python_lifecycle) {
|
||||
Py_Finalize();
|
||||
}
|
||||
Py_Finalize();
|
||||
WrapperRegistry::instance().cleanup();
|
||||
}
|
||||
|
||||
|
7
extern/mantaflow/helper/pwrapper/registry.h
vendored
7
extern/mantaflow/helper/pwrapper/registry.h
vendored
@@ -48,11 +48,8 @@ template<class T> struct Namify {
|
||||
namespace Pb {
|
||||
|
||||
// internal registry access
|
||||
void setup(bool python_lifecycle,
|
||||
const std::string &filename,
|
||||
const std::vector<std::string> &args,
|
||||
PyObject *name_space);
|
||||
void finalize(bool python_lifecycle);
|
||||
void setup(const std::string &filename, const std::vector<std::string> &args);
|
||||
void finalize();
|
||||
bool canConvert(PyObject *obj, const std::string &to);
|
||||
Manta::PbClass *objFromPy(PyObject *obj);
|
||||
Manta::PbClass *createPy(const std::string &classname,
|
||||
|
86
extern/mantaflow/patches/local_namespace.diff
vendored
86
extern/mantaflow/patches/local_namespace.diff
vendored
@@ -1,86 +0,0 @@
|
||||
diff --git a/extern/mantaflow/helper/pwrapper/registry.cpp b/extern/mantaflow/helper/pwrapper/registry.cpp
|
||||
index 5196c0409f8..b4206a41dea 100644
|
||||
--- a/extern/mantaflow/helper/pwrapper/registry.cpp
|
||||
+++ b/extern/mantaflow/helper/pwrapper/registry.cpp
|
||||
@@ -115,7 +115,7 @@ class WrapperRegistry {
|
||||
void construct(const std::string &scriptname, const vector<string> &args);
|
||||
void cleanup();
|
||||
void renameObjects();
|
||||
- void runPreInit();
|
||||
+ void runPreInit(PyObject *name_space);
|
||||
PyObject *initModule();
|
||||
ClassData *lookup(const std::string &name);
|
||||
bool canConvert(ClassData *from, ClassData *to);
|
||||
@@ -505,7 +505,7 @@ void WrapperRegistry::addConstants(PyObject *module)
|
||||
}
|
||||
}
|
||||
|
||||
-void WrapperRegistry::runPreInit()
|
||||
+void WrapperRegistry::runPreInit(PyObject *name_space)
|
||||
{
|
||||
// add python directories to path
|
||||
PyObject *sys_path = PySys_GetObject((char *)"path");
|
||||
@@ -518,7 +518,15 @@ void WrapperRegistry::runPreInit()
|
||||
}
|
||||
if (!mCode.empty()) {
|
||||
mCode = "from manta import *\n" + mCode;
|
||||
- PyRun_SimpleString(mCode.c_str());
|
||||
+ PyObject *return_value = PyRun_String(mCode.c_str(), Py_file_input, name_space, name_space);
|
||||
+ if (return_value == nullptr) {
|
||||
+ if (PyErr_Occurred()) {
|
||||
+ PyErr_Print();
|
||||
+ }
|
||||
+ }
|
||||
+ else {
|
||||
+ Py_DECREF(return_value);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -698,16 +706,23 @@ PyObject *WrapperRegistry::initModule()
|
||||
//******************************************************
|
||||
// Register members and exposed functions
|
||||
|
||||
-void setup(const std::string &filename, const std::vector<std::string> &args)
|
||||
+void setup(const bool python_lifecycle,
|
||||
+ const std::string &filename,
|
||||
+ const std::vector<std::string> &args,
|
||||
+ PyObject *name_space)
|
||||
{
|
||||
WrapperRegistry::instance().construct(filename, args);
|
||||
- Py_Initialize();
|
||||
- WrapperRegistry::instance().runPreInit();
|
||||
+ if (python_lifecycle) {
|
||||
+ Py_Initialize();
|
||||
+ }
|
||||
+ WrapperRegistry::instance().runPreInit(name_space);
|
||||
}
|
||||
|
||||
-void finalize()
|
||||
+void finalize(const bool python_lifecycle)
|
||||
{
|
||||
- Py_Finalize();
|
||||
+ if (python_lifecycle) {
|
||||
+ Py_Finalize();
|
||||
+ }
|
||||
WrapperRegistry::instance().cleanup();
|
||||
}
|
||||
|
||||
diff --git a/extern/mantaflow/helper/pwrapper/registry.h b/extern/mantaflow/helper/pwrapper/registry.h
|
||||
index d9d2bbb624b..2273d0b9bb1 100644
|
||||
--- a/extern/mantaflow/helper/pwrapper/registry.h
|
||||
+++ b/extern/mantaflow/helper/pwrapper/registry.h
|
||||
@@ -48,8 +48,11 @@ template<class T> struct Namify {
|
||||
namespace Pb {
|
||||
|
||||
// internal registry access
|
||||
-void setup(const std::string &filename, const std::vector<std::string> &args);
|
||||
-void finalize();
|
||||
+void setup(bool python_lifecycle,
|
||||
+ const std::string &filename,
|
||||
+ const std::vector<std::string> &args,
|
||||
+ PyObject *name_space);
|
||||
+void finalize(bool python_lifecycle);
|
||||
bool canConvert(PyObject *obj, const std::string &to);
|
||||
Manta::PbClass *objFromPy(PyObject *obj);
|
||||
Manta::PbClass *createPy(const std::string &classname,
|
@@ -67,10 +67,3 @@ endif()
|
||||
if(UNIX AND NOT APPLE)
|
||||
add_subdirectory(libc_compat)
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
# Important this comes after "ghost" as it uses includes defined by GHOST's CMake.
|
||||
if(WITH_GHOST_WAYLAND AND WITH_GHOST_WAYLAND_DYNLOAD)
|
||||
add_subdirectory(wayland_dynload)
|
||||
endif()
|
||||
endif()
|
||||
|
@@ -263,10 +263,6 @@ if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
add_definitions(-DWITH_EMBREE)
|
||||
include_directories(
|
||||
|
@@ -128,6 +128,10 @@ if(WITH_OPENIMAGEDENOISE)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_EXPERIMENTAL_FEATURES)
|
||||
add_definitions(-DWITH_NEW_CURVES_TYPE)
|
||||
endif()
|
||||
|
||||
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
add_dependencies(bf_intern_cycles bf_rna)
|
||||
|
@@ -84,36 +84,10 @@ class AddPresetViewportSampling(AddPresetBase, Operator):
|
||||
preset_subdir = "cycles/viewport_sampling"
|
||||
|
||||
|
||||
class AddPresetPerformance(AddPresetBase, Operator):
|
||||
'''Add an Performance Preset'''
|
||||
bl_idname = "render.cycles_performance_preset_add"
|
||||
bl_label = "Add Performance Preset"
|
||||
preset_menu = "CYCLES_PT_performance_presets"
|
||||
|
||||
preset_defines = [
|
||||
"render = bpy.context.scene.render"
|
||||
"cycles = bpy.context.scene.cycles"
|
||||
]
|
||||
|
||||
preset_values = [
|
||||
"render.threads_mode",
|
||||
"render.use_persistent_data",
|
||||
"cycles.debug_use_spatial_splits",
|
||||
"cycles.debug_use_compact_bvh",
|
||||
"cycles.debug_use_hair_bvh",
|
||||
"cycles.debug_bvh_time_steps",
|
||||
"cycles.use_auto_tile",
|
||||
"cycles.tile_size",
|
||||
]
|
||||
|
||||
preset_subdir = "cycles/performance"
|
||||
|
||||
|
||||
classes = (
|
||||
AddPresetIntegrator,
|
||||
AddPresetSampling,
|
||||
AddPresetViewportSampling,
|
||||
AddPresetPerformance,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -118,8 +118,7 @@ enum_device_type = (
|
||||
('CUDA', "CUDA", "CUDA", 1),
|
||||
('OPTIX', "OptiX", "OptiX", 3),
|
||||
('HIP', "HIP", "HIP", 4),
|
||||
('METAL', "Metal", "Metal", 5),
|
||||
('ONEAPI', "oneAPI", "oneAPI", 6)
|
||||
('METAL', "Metal", "Metal", 5)
|
||||
)
|
||||
|
||||
enum_texture_limit = (
|
||||
@@ -693,7 +692,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
debug_use_compact_bvh: BoolProperty(
|
||||
name="Use Compact BVH",
|
||||
description="Use compact BVH structure (uses less ram but renders slower)",
|
||||
default=False,
|
||||
default=True,
|
||||
)
|
||||
debug_bvh_time_steps: IntProperty(
|
||||
name="BVH Time Steps",
|
||||
@@ -1398,8 +1397,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def get_device_types(self, context):
|
||||
import _cycles
|
||||
has_cuda, has_optix, has_hip, has_metal, has_oneapi = _cycles.get_device_types()
|
||||
|
||||
has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
|
||||
list = [('NONE', "None", "Don't use compute device", 0)]
|
||||
if has_cuda:
|
||||
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
|
||||
@@ -1409,8 +1407,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
|
||||
if has_metal:
|
||||
list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))
|
||||
if has_oneapi:
|
||||
list.append(('ONEAPI', "oneAPI", "Use oneAPI for GPU acceleration", 6))
|
||||
|
||||
return list
|
||||
|
||||
@@ -1442,7 +1438,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def update_device_entries(self, device_list):
|
||||
for device in device_list:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL', 'ONEAPI'}:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
|
||||
continue
|
||||
# Try to find existing Device entry
|
||||
entry = self.find_existing_device_entry(device)
|
||||
@@ -1486,7 +1482,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
import _cycles
|
||||
# Ensure `self.devices` is not re-allocated when the second call to
|
||||
# get_devices_for_type is made, freeing items from the first list.
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL', 'ONEAPI'):
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
|
||||
self.update_device_entries(_cycles.available_devices(device_type))
|
||||
|
||||
# Deprecated: use refresh_devices instead.
|
||||
@@ -1549,31 +1545,18 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
elif device_type == 'HIP':
|
||||
import sys
|
||||
if sys.platform[:3] == "win":
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||
elif sys.platform.startswith("linux"):
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
|
||||
elif device_type == 'ONEAPI':
|
||||
import sys
|
||||
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
|
||||
if sys.platform.startswith("win"):
|
||||
col.label(text="and Windows driver version 101.1660 or newer", icon='BLANK1')
|
||||
elif sys.platform.startswith("linux"):
|
||||
col.label(text="and Linux driver version xx.xx.23570 or newer", icon='BLANK1')
|
||||
elif device_type == 'METAL':
|
||||
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
|
||||
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
|
||||
return
|
||||
|
||||
for device in devices:
|
||||
import unicodedata
|
||||
box.prop(
|
||||
device, "use", text=device.name
|
||||
.replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
|
||||
.replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
|
||||
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
|
||||
)
|
||||
box.prop(device, "use", text=device.name)
|
||||
|
||||
def draw_impl(self, layout, context):
|
||||
row = layout.row()
|
||||
|
@@ -43,12 +43,6 @@ class CYCLES_PT_integrator_presets(CyclesPresetPanel):
|
||||
preset_add_operator = "render.cycles_integrator_preset_add"
|
||||
|
||||
|
||||
class CYCLES_PT_performance_presets(CyclesPresetPanel):
|
||||
bl_label = "Performance Presets"
|
||||
preset_subdir = "cycles/performance"
|
||||
preset_add_operator = "render.cycles_performance_preset_add"
|
||||
|
||||
|
||||
class CyclesButtonsPanel:
|
||||
bl_space_type = "PROPERTIES"
|
||||
bl_region_type = "WINDOW"
|
||||
@@ -117,12 +111,6 @@ def use_optix(context):
|
||||
return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')
|
||||
|
||||
|
||||
def use_oneapi(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
return (get_device_type(context) == 'ONEAPI' and cscene.device == 'GPU')
|
||||
|
||||
|
||||
def use_multi_device(context):
|
||||
cscene = context.scene.cycles
|
||||
if cscene.device != 'GPU':
|
||||
@@ -630,9 +618,6 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Performance"
|
||||
bl_options = {'DEFAULT_CLOSED'}
|
||||
|
||||
def draw_header_preset(self, context):
|
||||
CYCLES_PT_performance_presets.draw_panel_header(self.layout)
|
||||
|
||||
def draw(self, context):
|
||||
pass
|
||||
|
||||
@@ -952,8 +937,6 @@ class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):
|
||||
|
||||
col = split.column()
|
||||
col.prop(dof, "focus_object", text="Focus Object")
|
||||
if dof.focus_object and dof.focus_object.type == 'ARMATURE':
|
||||
col.prop_search(dof, "focus_subtarget", dof.focus_object.data, "bones", text="Focus Bone")
|
||||
|
||||
sub = col.row()
|
||||
sub.active = dof.focus_object is None
|
||||
@@ -1213,7 +1196,7 @@ class CYCLES_OBJECT_PT_lightgroup(CyclesButtonsPanel, Panel):
|
||||
sub.prop_search(ob, "lightgroup", view_layer, "lightgroups", text="Light Group", results_are_suggestions=True)
|
||||
|
||||
sub = row.column(align=True)
|
||||
sub.enabled = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.active = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = ob.lightgroup
|
||||
|
||||
|
||||
@@ -1651,7 +1634,7 @@ class CYCLES_WORLD_PT_settings_light_group(CyclesButtonsPanel, Panel):
|
||||
)
|
||||
|
||||
sub = row.column(align=True)
|
||||
sub.enabled = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.active = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
|
||||
sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = world.lightgroup
|
||||
|
||||
|
||||
@@ -2280,7 +2263,6 @@ classes = (
|
||||
CYCLES_PT_sampling_presets,
|
||||
CYCLES_PT_viewport_sampling_presets,
|
||||
CYCLES_PT_integrator_presets,
|
||||
CYCLES_PT_performance_presets,
|
||||
CYCLES_RENDER_PT_sampling,
|
||||
CYCLES_RENDER_PT_sampling_viewport,
|
||||
CYCLES_RENDER_PT_sampling_viewport_denoise,
|
||||
|
@@ -143,20 +143,11 @@ static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
|
||||
if (!b_dof_object)
|
||||
return b_camera.dof().focus_distance();
|
||||
|
||||
Transform dofmat = get_transform(b_dof_object.matrix_world());
|
||||
|
||||
string focus_subtarget = b_camera.dof().focus_subtarget();
|
||||
if (b_dof_object.pose() && !focus_subtarget.empty()) {
|
||||
BL::PoseBone b_bone = b_dof_object.pose().bones[focus_subtarget];
|
||||
if (b_bone) {
|
||||
dofmat = dofmat * get_transform(b_bone.matrix());
|
||||
}
|
||||
}
|
||||
|
||||
/* for dof object, return distance along camera Z direction */
|
||||
BL::Array<float, 16> b_ob_matrix;
|
||||
b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
|
||||
Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
|
||||
Transform dofmat = get_transform(b_dof_object.matrix_world());
|
||||
float3 view_dir = normalize(transform_get_column(&obmat, 2));
|
||||
float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
|
||||
return fabsf(dot(view_dir, dof_dir));
|
||||
|
@@ -613,6 +613,8 @@ void BlenderSync::sync_particle_hair(
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
|
||||
static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_curves.attributes) {
|
||||
@@ -630,25 +632,6 @@ static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static BL::FloatVectorAttribute find_curves_position_attribute(BL::Curves b_curves)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_curves.attributes) {
|
||||
if (b_attribute.name() != "position") {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.domain() != BL::Attribute::domain_POINT) {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
|
||||
continue;
|
||||
}
|
||||
return BL::FloatVectorAttribute{b_attribute};
|
||||
}
|
||||
/* The position attribute must exist. */
|
||||
assert(false);
|
||||
return BL::FloatVectorAttribute{b_curves.attributes[0]};
|
||||
}
|
||||
|
||||
template<typename TypeInCycles, typename GetValueAtIndex>
|
||||
static void fill_generic_attribute(BL::Curves &b_curves,
|
||||
TypeInCycles *data,
|
||||
@@ -812,16 +795,16 @@ static void attr_create_generic(Scene *scene,
|
||||
}
|
||||
}
|
||||
|
||||
static float4 hair_point_as_float4(BL::FloatVectorAttribute b_attr_position,
|
||||
static float4 hair_point_as_float4(BL::Curves b_curves,
|
||||
std::optional<BL::FloatAttribute> b_attr_radius,
|
||||
const int index)
|
||||
{
|
||||
float4 mP = float3_to_float4(get_float3(b_attr_position.data[index].vector()));
|
||||
float4 mP = float3_to_float4(get_float3(b_curves.position_data[index].vector()));
|
||||
mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.0f;
|
||||
return mP;
|
||||
}
|
||||
|
||||
static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
|
||||
static float4 interpolate_hair_points(BL::Curves b_curves,
|
||||
std::optional<BL::FloatAttribute> b_attr_radius,
|
||||
const int first_point_index,
|
||||
const int num_points,
|
||||
@@ -831,8 +814,8 @@ static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
|
||||
const int point_a = clamp((int)curve_t, 0, num_points - 1);
|
||||
const int point_b = min(point_a + 1, num_points - 1);
|
||||
const float t = curve_t - (float)point_a;
|
||||
return lerp(hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
|
||||
hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
|
||||
return lerp(hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_a),
|
||||
hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_b),
|
||||
t);
|
||||
}
|
||||
|
||||
@@ -865,7 +848,6 @@ static void export_hair_curves(Scene *scene,
|
||||
|
||||
hair->reserve_curves(num_curves, num_keys);
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
|
||||
|
||||
/* Export curves and points. */
|
||||
@@ -884,9 +866,9 @@ static void export_hair_curves(Scene *scene,
|
||||
|
||||
/* Position and radius. */
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
const float3 co = get_float3(b_attr_position.data[first_point_index + i].vector());
|
||||
const float3 co = get_float3(b_curves.position_data[first_point_index + i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[first_point_index + i].value() :
|
||||
0.005f;
|
||||
0.0f;
|
||||
hair->add_curve_key(co, radius);
|
||||
|
||||
if (attr_intercept) {
|
||||
@@ -941,7 +923,6 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
|
||||
int num_motion_keys = 0;
|
||||
int curve_index = 0;
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
|
||||
|
||||
for (int i = 0; i < num_curves; i++) {
|
||||
@@ -957,7 +938,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
|
||||
int point_index = first_point_index + i;
|
||||
|
||||
if (point_index < num_keys) {
|
||||
mP[num_motion_keys] = hair_point_as_float4(b_attr_position, b_attr_radius, point_index);
|
||||
mP[num_motion_keys] = hair_point_as_float4(b_curves, b_attr_radius, point_index);
|
||||
num_motion_keys++;
|
||||
|
||||
if (!have_motion) {
|
||||
@@ -977,7 +958,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
|
||||
for (int i = 0; i < curve.num_keys; i++) {
|
||||
const float step = i * step_size;
|
||||
mP[num_motion_keys] = interpolate_hair_points(
|
||||
b_attr_position, b_attr_radius, first_point_index, num_points, step);
|
||||
b_curves, b_attr_radius, first_point_index, num_points, step);
|
||||
num_motion_keys++;
|
||||
}
|
||||
have_motion = true;
|
||||
@@ -1009,6 +990,15 @@ void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int
|
||||
export_hair_curves(scene, hair, b_curves, need_motion, motion_scale);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
|
||||
{
|
||||
(void)hair;
|
||||
(void)b_ob_info;
|
||||
(void)motion;
|
||||
(void)motion_step;
|
||||
}
|
||||
#endif
|
||||
|
||||
void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
|
||||
{
|
||||
@@ -1020,11 +1010,14 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
|
||||
new_hair.set_used_shaders(used_shaders);
|
||||
|
||||
if (view_layer.use_hair) {
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves)) {
|
||||
/* Hair object. */
|
||||
sync_hair(&new_hair, b_ob_info, false);
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/* Particle hair. */
|
||||
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
||||
BL::Mesh b_mesh = object_to_mesh(
|
||||
@@ -1071,12 +1064,15 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
|
||||
|
||||
/* Export deformed coordinates. */
|
||||
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves)) {
|
||||
/* Hair object. */
|
||||
sync_hair(hair, b_ob_info, true, motion_step);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/* Particle hair. */
|
||||
BL::Mesh b_mesh = object_to_mesh(
|
||||
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
||||
|
@@ -15,7 +15,6 @@ enum ComputeDevice {
|
||||
COMPUTE_DEVICE_OPTIX = 3,
|
||||
COMPUTE_DEVICE_HIP = 4,
|
||||
COMPUTE_DEVICE_METAL = 5,
|
||||
COMPUTE_DEVICE_ONEAPI = 6,
|
||||
|
||||
COMPUTE_DEVICE_NUM
|
||||
};
|
||||
@@ -77,9 +76,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
|
||||
else if (compute_device == COMPUTE_DEVICE_METAL) {
|
||||
mask |= DEVICE_MASK_METAL;
|
||||
}
|
||||
else if (compute_device == COMPUTE_DEVICE_ONEAPI) {
|
||||
mask |= DEVICE_MASK_ONEAPI;
|
||||
}
|
||||
vector<DeviceInfo> devices = Device::available_devices(mask);
|
||||
|
||||
/* Match device preferences and available devices. */
|
||||
|
@@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
||||
{
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
|
||||
#else
|
||||
if (use_particle_hair) {
|
||||
#endif
|
||||
return Geometry::HAIR;
|
||||
}
|
||||
|
||||
@@ -213,7 +217,11 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
|
||||
#ifdef WITH_NEW_CURVES_TYPE
|
||||
if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
|
||||
#else
|
||||
if (use_particle_hair) {
|
||||
#endif
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
||||
}
|
||||
|
@@ -1,10 +1,8 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/scene.h"
|
||||
|
||||
#include "blender/sync.h"
|
||||
@@ -140,36 +138,6 @@ static void copy_attributes(PointCloud *pointcloud,
|
||||
}
|
||||
}
|
||||
|
||||
static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_pointcloud)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
|
||||
if (b_attribute.name() != "radius") {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
|
||||
continue;
|
||||
}
|
||||
return BL::FloatAttribute{b_attribute};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcloud)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
|
||||
if (b_attribute.name() != "position") {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
|
||||
continue;
|
||||
}
|
||||
return BL::FloatVectorAttribute{b_attribute};
|
||||
}
|
||||
/* The position attribute must exist. */
|
||||
assert(false);
|
||||
return BL::FloatVectorAttribute{b_pointcloud.attributes[0]};
|
||||
}
|
||||
|
||||
static void export_pointcloud(Scene *scene,
|
||||
PointCloud *pointcloud,
|
||||
BL::PointCloud b_pointcloud,
|
||||
@@ -188,18 +156,18 @@ static void export_pointcloud(Scene *scene,
|
||||
const int num_points = b_pointcloud.points.length();
|
||||
pointcloud->reserve(num_points);
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
|
||||
|
||||
/* Export points. */
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
const float3 co = get_float3(b_attr_position.data[i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
|
||||
BL::PointCloud::points_iterator b_point_iter;
|
||||
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||
++b_point_iter) {
|
||||
BL::Point b_point = *b_point_iter;
|
||||
const float3 co = get_float3(b_point.co());
|
||||
const float radius = b_point.radius();
|
||||
pointcloud->add_point(co, radius);
|
||||
|
||||
/* Random number per point. */
|
||||
if (attr_random != NULL) {
|
||||
attr_random->add(hash_uint2_to_float(i, 0));
|
||||
attr_random->add(hash_uint2_to_float(b_point.index(), 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,15 +195,14 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
|
||||
int num_motion_points = 0;
|
||||
const array<float3> &pointcloud_points = pointcloud->get_points();
|
||||
|
||||
BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
|
||||
BL::PointCloud::points_iterator b_point_iter;
|
||||
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||
++b_point_iter) {
|
||||
BL::Point b_point = *b_point_iter;
|
||||
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
if (num_motion_points < num_points) {
|
||||
const float3 co = get_float3(b_attr_position.data[i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
|
||||
float3 P = co;
|
||||
P.w = radius;
|
||||
float3 P = get_float3(b_point.co());
|
||||
P.w = b_point.radius();
|
||||
mP[num_motion_points] = P;
|
||||
have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
|
||||
num_motion_points++;
|
||||
|
@@ -871,20 +871,18 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
|
||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||
{
|
||||
vector<DeviceType> device_types = Device::available_types();
|
||||
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false;
|
||||
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
|
||||
foreach (DeviceType device_type, device_types) {
|
||||
has_cuda |= (device_type == DEVICE_CUDA);
|
||||
has_optix |= (device_type == DEVICE_OPTIX);
|
||||
has_hip |= (device_type == DEVICE_HIP);
|
||||
has_metal |= (device_type == DEVICE_METAL);
|
||||
has_oneapi |= (device_type == DEVICE_ONEAPI);
|
||||
}
|
||||
PyObject *list = PyTuple_New(5);
|
||||
PyObject *list = PyTuple_New(4);
|
||||
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
|
||||
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
|
||||
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
|
||||
PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
|
||||
PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi));
|
||||
return list;
|
||||
}
|
||||
|
||||
@@ -916,9 +914,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
|
||||
else if (override == "METAL") {
|
||||
BlenderSession::device_override = DEVICE_MASK_METAL;
|
||||
}
|
||||
else if (override == "ONEAPI") {
|
||||
BlenderSession::device_override = DEVICE_MASK_ONEAPI;
|
||||
}
|
||||
else {
|
||||
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
|
||||
Py_RETURN_FALSE;
|
||||
|
@@ -129,7 +129,7 @@ class BVHParams {
|
||||
|
||||
top_level = false;
|
||||
bvh_layout = BVH_LAYOUT_BVH2;
|
||||
use_compact_structure = false;
|
||||
use_compact_structure = true;
|
||||
use_unaligned_nodes = false;
|
||||
|
||||
num_motion_curve_steps = 0;
|
||||
|
@@ -91,8 +91,6 @@ if(CYCLES_STANDALONE_REPOSITORY)
|
||||
_set_default(USD_ROOT_DIR "${_cycles_lib_dir}/usd")
|
||||
_set_default(WEBP_ROOT_DIR "${_cycles_lib_dir}/webp")
|
||||
_set_default(ZLIB_ROOT "${_cycles_lib_dir}/zlib")
|
||||
_set_default(LEVEL_ZERO_ROOT_DIR "${_cycles_lib_dir}/level-zero")
|
||||
_set_default(SYCL_ROOT_DIR "${_cycles_lib_dir}/dpcpp")
|
||||
|
||||
# Ignore system libraries
|
||||
set(CMAKE_IGNORE_PATH "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES};${CMAKE_SYSTEM_INCLUDE_PATH};${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES};${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}")
|
||||
@@ -649,22 +647,3 @@ if(WITH_CYCLES_DEVICE_METAL)
|
||||
message(STATUS "Found Metal: ${METAL_LIBRARY}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# oneAPI
|
||||
###########################################################################
|
||||
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
find_package(SYCL)
|
||||
find_package(LevelZero)
|
||||
|
||||
if (SYCL_FOUND AND LEVEL_ZERO_FOUND)
|
||||
message(STATUS "Found oneAPI: ${SYCL_LIBRARY}")
|
||||
message(STATUS "Found Level Zero: ${LEVEL_ZERO_LIBRARY}")
|
||||
else()
|
||||
message(STATUS "oneAPI or Level Zero not found, disabling oneAPI device from Cycles")
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
unset(_cycles_lib_dir)
|
||||
|
@@ -82,15 +82,6 @@ set(SRC_HIP
|
||||
hip/util.h
|
||||
)
|
||||
|
||||
set(SRC_ONEAPI
|
||||
oneapi/device_impl.cpp
|
||||
oneapi/device_impl.h
|
||||
oneapi/device.cpp
|
||||
oneapi/device.h
|
||||
oneapi/queue.cpp
|
||||
oneapi/queue.h
|
||||
)
|
||||
|
||||
set(SRC_DUMMY
|
||||
dummy/device.cpp
|
||||
dummy/device.h
|
||||
@@ -143,7 +134,6 @@ set(SRC
|
||||
${SRC_DUMMY}
|
||||
${SRC_MULTI}
|
||||
${SRC_OPTIX}
|
||||
${SRC_ONEAPI}
|
||||
${SRC_HEADERS}
|
||||
)
|
||||
|
||||
@@ -191,9 +181,6 @@ if(WITH_CYCLES_DEVICE_METAL)
|
||||
${SRC_METAL}
|
||||
)
|
||||
endif()
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
endif()
|
||||
|
||||
if(WITH_OPENIMAGEDENOISE)
|
||||
list(APPEND LIB
|
||||
@@ -206,11 +193,6 @@ include_directories(SYSTEM ${INC_SYS})
|
||||
|
||||
cycles_add_library(cycles_device "${LIB}" ${SRC})
|
||||
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
# Need to have proper rebuilding in case of changes in cycles_kernel_oneapi due external project behaviour
|
||||
add_dependencies(cycles_device cycles_kernel_oneapi)
|
||||
endif()
|
||||
|
||||
source_group("cpu" FILES ${SRC_CPU})
|
||||
source_group("cuda" FILES ${SRC_CUDA})
|
||||
source_group("dummy" FILES ${SRC_DUMMY})
|
||||
@@ -218,5 +200,4 @@ source_group("hip" FILES ${SRC_HIP})
|
||||
source_group("multi" FILES ${SRC_MULTI})
|
||||
source_group("metal" FILES ${SRC_METAL})
|
||||
source_group("optix" FILES ${SRC_OPTIX})
|
||||
source_group("oneapi" FILES ${SRC_ONEAPI})
|
||||
source_group("common" FILES ${SRC_BASE} ${SRC_HEADERS})
|
||||
|
@@ -197,7 +197,7 @@ void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
|
||||
// Update scene handle (since it is different for each device on multi devices)
|
||||
KernelData *const data = (KernelData *)host;
|
||||
data->device_bvh = embree_scene;
|
||||
data->bvh.scene = embree_scene;
|
||||
}
|
||||
#endif
|
||||
kernel_const_copy(&kernel_globals, name, host, size);
|
||||
|
@@ -16,7 +16,6 @@
|
||||
#include "device/hip/device.h"
|
||||
#include "device/metal/device.h"
|
||||
#include "device/multi/device.h"
|
||||
#include "device/oneapi/device.h"
|
||||
#include "device/optix/device.h"
|
||||
|
||||
#include "util/foreach.h"
|
||||
@@ -40,7 +39,6 @@ vector<DeviceInfo> Device::optix_devices;
|
||||
vector<DeviceInfo> Device::cpu_devices;
|
||||
vector<DeviceInfo> Device::hip_devices;
|
||||
vector<DeviceInfo> Device::metal_devices;
|
||||
vector<DeviceInfo> Device::oneapi_devices;
|
||||
uint Device::devices_initialized_mask = 0;
|
||||
|
||||
/* Device */
|
||||
@@ -103,13 +101,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
device = device_metal_create(info, stats, profiler);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
case DEVICE_ONEAPI:
|
||||
device = device_oneapi_create(info, stats, profiler);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -135,8 +126,6 @@ DeviceType Device::type_from_string(const char *name)
|
||||
return DEVICE_HIP;
|
||||
else if (strcmp(name, "METAL") == 0)
|
||||
return DEVICE_METAL;
|
||||
else if (strcmp(name, "ONEAPI") == 0)
|
||||
return DEVICE_ONEAPI;
|
||||
|
||||
return DEVICE_NONE;
|
||||
}
|
||||
@@ -155,8 +144,6 @@ string Device::string_from_type(DeviceType type)
|
||||
return "HIP";
|
||||
else if (type == DEVICE_METAL)
|
||||
return "METAL";
|
||||
else if (type == DEVICE_ONEAPI)
|
||||
return "ONEAPI";
|
||||
|
||||
return "";
|
||||
}
|
||||
@@ -176,9 +163,6 @@ vector<DeviceType> Device::available_types()
|
||||
#endif
|
||||
#ifdef WITH_METAL
|
||||
types.push_back(DEVICE_METAL);
|
||||
#endif
|
||||
#ifdef WITH_ONEAPI
|
||||
types.push_back(DEVICE_ONEAPI);
|
||||
#endif
|
||||
return types;
|
||||
}
|
||||
@@ -235,20 +219,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
if (mask & DEVICE_MASK_ONEAPI) {
|
||||
if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
|
||||
if (device_oneapi_init()) {
|
||||
device_oneapi_info(oneapi_devices);
|
||||
}
|
||||
devices_initialized_mask |= DEVICE_MASK_ONEAPI;
|
||||
}
|
||||
foreach (DeviceInfo &info, oneapi_devices) {
|
||||
devices.push_back(info);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mask & DEVICE_MASK_CPU) {
|
||||
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
|
||||
device_cpu_info(cpu_devices);
|
||||
@@ -312,15 +282,6 @@ string Device::device_capabilities(uint mask)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
if (mask & DEVICE_MASK_ONEAPI) {
|
||||
if (device_oneapi_init()) {
|
||||
capabilities += "\noneAPI device capabilities:\n";
|
||||
capabilities += device_oneapi_capabilities();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_METAL
|
||||
if (mask & DEVICE_MASK_METAL) {
|
||||
if (device_metal_init()) {
|
||||
@@ -419,7 +380,6 @@ void Device::free_memory()
|
||||
cuda_devices.free_memory();
|
||||
optix_devices.free_memory();
|
||||
hip_devices.free_memory();
|
||||
oneapi_devices.free_memory();
|
||||
cpu_devices.free_memory();
|
||||
metal_devices.free_memory();
|
||||
}
|
||||
|
@@ -29,7 +29,6 @@ class DeviceQueue;
|
||||
class Progress;
|
||||
class CPUKernels;
|
||||
class CPUKernelThreadGlobals;
|
||||
class Scene;
|
||||
|
||||
/* Device Types */
|
||||
|
||||
@@ -41,7 +40,6 @@ enum DeviceType {
|
||||
DEVICE_OPTIX,
|
||||
DEVICE_HIP,
|
||||
DEVICE_METAL,
|
||||
DEVICE_ONEAPI,
|
||||
DEVICE_DUMMY,
|
||||
};
|
||||
|
||||
@@ -51,7 +49,6 @@ enum DeviceTypeMask {
|
||||
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
|
||||
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
|
||||
DEVICE_MASK_METAL = (1 << DEVICE_METAL),
|
||||
DEVICE_MASK_ONEAPI = (1 << DEVICE_ONEAPI),
|
||||
DEVICE_MASK_ALL = ~0
|
||||
};
|
||||
|
||||
@@ -187,11 +184,6 @@ class Device {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called after kernel texture setup, and prior to integrator state setup. */
|
||||
virtual void optimize_for_scene(Scene * /*scene*/)
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
|
||||
{
|
||||
/* Memory is always resident if this is not a multi device, regardless of whether the pointer
|
||||
@@ -281,7 +273,6 @@ class Device {
|
||||
static vector<DeviceInfo> cpu_devices;
|
||||
static vector<DeviceInfo> hip_devices;
|
||||
static vector<DeviceInfo> metal_devices;
|
||||
static vector<DeviceInfo> oneapi_devices;
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
|
@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
|
||||
return (major >= 9);
|
||||
return (major > 10) || (major == 10 && minor >= 1);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -34,8 +34,7 @@ void device_metal_info(vector<DeviceInfo> &devices)
|
||||
int device_index = 0;
|
||||
for (id<MTLDevice> &device : usable_devices) {
|
||||
/* Compute unique ID for persistent user preferences. */
|
||||
string device_name = MetalInfo::get_device_name(device);
|
||||
|
||||
string device_name = [device.name UTF8String];
|
||||
string id = string("METAL_") + device_name;
|
||||
|
||||
/* Hardware ID might not be unique, add device number in that case. */
|
||||
@@ -49,6 +48,12 @@ void device_metal_info(vector<DeviceInfo> &devices)
|
||||
info.type = DEVICE_METAL;
|
||||
info.description = string_remove_trademark(string(device_name));
|
||||
|
||||
/* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
|
||||
* GPU */
|
||||
if (info.description == system_cpu_brand_string()) {
|
||||
info.description += " (GPU)";
|
||||
}
|
||||
|
||||
info.num = device_index;
|
||||
/* We don't know if it's used for display, but assume it is. */
|
||||
info.display_device = true;
|
||||
@@ -64,15 +69,14 @@ string device_metal_capabilities()
|
||||
{
|
||||
string result = "";
|
||||
auto allDevices = MTLCopyAllDevices();
|
||||
uint32_t num_devices = (uint32_t)allDevices.count;
|
||||
uint32_t num_devices = allDevices.count;
|
||||
if (num_devices == 0) {
|
||||
return "No Metal devices found\n";
|
||||
}
|
||||
result += string_printf("Number of devices: %u\n", num_devices);
|
||||
|
||||
for (id<MTLDevice> device in allDevices) {
|
||||
string device_name = MetalInfo::get_device_name(device);
|
||||
result += string_printf("\t\tDevice: %s\n", device_name.c_str());
|
||||
result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@@ -42,6 +42,7 @@ class MetalDevice : public Device {
|
||||
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
|
||||
/*---------------------------------------------------*/
|
||||
|
||||
string device_name;
|
||||
MetalGPUVendor device_vendor;
|
||||
|
||||
uint kernel_features;
|
||||
@@ -75,8 +76,7 @@ class MetalDevice : public Device {
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
|
||||
bool use_metalrt = false;
|
||||
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
|
||||
std::atomic_bool async_compile_and_load = false;
|
||||
bool use_function_specialisation = false;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
@@ -92,7 +92,9 @@ class MetalDevice : public Device {
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
||||
string get_source(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
@@ -110,9 +112,7 @@ class MetalDevice : public Device {
|
||||
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
virtual void optimize_for_scene(Scene *scene) override;
|
||||
|
||||
bool compile_and_load(MetalPipelineType pso_type);
|
||||
id<MTLLibrary> compile(string const &source);
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* low-level memory management */
|
||||
|
@@ -6,12 +6,9 @@
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "device/metal/device.h"
|
||||
|
||||
# include "scene/scene.h"
|
||||
|
||||
# include "util/debug.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -46,9 +43,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
auto usable_devices = MetalInfo::get_usable_devices();
|
||||
assert(mtlDevId < usable_devices.size());
|
||||
mtlDevice = usable_devices[mtlDevId];
|
||||
device_vendor = MetalInfo::get_device_vendor(mtlDevice);
|
||||
device_name = [mtlDevice.name UTF8String];
|
||||
device_vendor = MetalInfo::get_vendor_from_device_name(device_name);
|
||||
assert(device_vendor != METAL_GPU_UNKNOWN);
|
||||
metal_printf("Creating new Cycles device for Metal: %s\n", info.description.c_str());
|
||||
metal_printf("Creating new Cycles device for Metal: %s\n", device_name.c_str());
|
||||
|
||||
/* determine default storage mode based on whether UMA is supported */
|
||||
|
||||
@@ -80,10 +78,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
case METAL_GPU_APPLE: {
|
||||
max_threads_per_threadgroup = 512;
|
||||
use_metalrt = info.use_metalrt;
|
||||
|
||||
/* Specialize the intersection kernels on Apple GPUs by default as these can be built very
|
||||
* quickly. */
|
||||
kernel_specialization_level = PSO_SPECIALIZED_INTERSECT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -96,13 +90,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
capture_enabled = true;
|
||||
}
|
||||
|
||||
if (auto envstr = getenv("CYCLES_METAL_SPECIALIZATION_LEVEL")) {
|
||||
kernel_specialization_level = (MetalPipelineType)atoi(envstr);
|
||||
}
|
||||
metal_printf("kernel_specialization_level = %s\n",
|
||||
kernel_type_as_string(
|
||||
(MetalPipelineType)min((int)kernel_specialization_level, (int)PSO_NUM - 1)));
|
||||
|
||||
MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_params.dataType = MTLDataTypePointer;
|
||||
arg_desc_params.access = MTLArgumentAccessReadOnly;
|
||||
@@ -222,86 +209,61 @@ bool MetalDevice::use_adaptive_compilation()
|
||||
return DebugFlags().metal.adaptive_compile;
|
||||
}
|
||||
|
||||
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
||||
string MetalDevice::get_source(const uint kernel_features)
|
||||
{
|
||||
string global_defines;
|
||||
string build_options;
|
||||
|
||||
if (use_adaptive_compilation()) {
|
||||
global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
|
||||
build_options += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
|
||||
}
|
||||
|
||||
if (use_metalrt) {
|
||||
global_defines += "#define __METALRT__\n";
|
||||
build_options += "-D__METALRT__ ";
|
||||
if (motion_blur) {
|
||||
global_defines += "#define __METALRT_MOTION__\n";
|
||||
build_options += "-D__METALRT_MOTION__ ";
|
||||
}
|
||||
}
|
||||
|
||||
# ifdef WITH_CYCLES_DEBUG
|
||||
global_defines += "#define __KERNEL_DEBUG__\n";
|
||||
build_options += "-D__KERNEL_DEBUG__ ";
|
||||
# endif
|
||||
|
||||
switch (device_vendor) {
|
||||
default:
|
||||
break;
|
||||
case METAL_GPU_INTEL:
|
||||
global_defines += "#define __KERNEL_METAL_INTEL__\n";
|
||||
build_options += "-D__KERNEL_METAL_INTEL__ ";
|
||||
break;
|
||||
case METAL_GPU_AMD:
|
||||
global_defines += "#define __KERNEL_METAL_AMD__\n";
|
||||
build_options += "-D__KERNEL_METAL_AMD__ ";
|
||||
break;
|
||||
case METAL_GPU_APPLE:
|
||||
global_defines += "#define __KERNEL_METAL_APPLE__\n";
|
||||
build_options += "-D__KERNEL_METAL_APPLE__ ";
|
||||
break;
|
||||
}
|
||||
|
||||
string &source = this->source[pso_type];
|
||||
source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
|
||||
source = path_source_replace_includes(source, path_get("source"));
|
||||
/* reformat -D defines list into compilable form */
|
||||
vector<string> components;
|
||||
string_replace(build_options, "-D", "");
|
||||
string_split(components, build_options, " ");
|
||||
|
||||
/* Perform any required specialization on the source.
|
||||
* With Metal function constants we can generate a single variant of the kernel source which can
|
||||
* be repeatedly respecialized.
|
||||
*/
|
||||
string baked_constants;
|
||||
|
||||
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
|
||||
* the same character length. Build a string of all active constant values which is then hashed
|
||||
* in order to identify the PSO.
|
||||
*/
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
const double starttime = time_dt();
|
||||
|
||||
# define KERNEL_STRUCT_BEGIN(name, parent) \
|
||||
string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
|
||||
|
||||
/* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
baked_constants += string(#parent "." #name "=") + \
|
||||
to_string(_type(launch_params.data.parent.name)) + "\n";
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
/* Opt in to all of available specializations. This can be made more granular for the
|
||||
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
|
||||
* but the overhead should be negligible as these are very quick to (re)build and aren't
|
||||
* serialized to disk via MTLBinaryArchives.
|
||||
*/
|
||||
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
|
||||
|
||||
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
|
||||
string globalDefines;
|
||||
for (const string &component : components) {
|
||||
vector<string> assignments;
|
||||
string_split(assignments, component, "=");
|
||||
if (assignments.size() == 2)
|
||||
globalDefines += string_printf(
|
||||
"#define %s %s\n", assignments[0].c_str(), assignments[1].c_str());
|
||||
else
|
||||
globalDefines += string_printf("#define %s\n", assignments[0].c_str());
|
||||
}
|
||||
|
||||
source = global_defines + source;
|
||||
metal_printf("================\n%s================\n\%s================\n",
|
||||
global_defines.c_str(),
|
||||
baked_constants.c_str());
|
||||
string source = globalDefines + "\n#include \"kernel/device/metal/kernel.metal\"\n";
|
||||
source = path_source_replace_includes(source, path_get("source"));
|
||||
|
||||
/* Generate an MD5 from the source and include any baked constants. This is used when caching
|
||||
* PSOs. */
|
||||
MD5Hash md5;
|
||||
md5.append(baked_constants);
|
||||
md5.append(source);
|
||||
source_md5[pso_type] = md5.get_hex();
|
||||
metal_printf("Global defines:\n%s\n", globalDefines.c_str());
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
bool MetalDevice::load_kernels(const uint _kernel_features)
|
||||
@@ -317,22 +279,24 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
|
||||
* active, but may still need to be rendered without motion blur if that isn't active as well. */
|
||||
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
bool result = compile_and_load(PSO_GENERIC);
|
||||
source[PSO_GENERIC] = get_source(kernel_features);
|
||||
mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
|
||||
|
||||
MD5Hash md5;
|
||||
md5.append(source[PSO_GENERIC]);
|
||||
source_md5[PSO_GENERIC] = md5.get_hex();
|
||||
|
||||
metal_printf("Front-end compilation finished (generic)\n");
|
||||
|
||||
bool result = MetalDeviceKernels::load(this, false);
|
||||
|
||||
reserve_local_memory(kernel_features);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
|
||||
id<MTLLibrary> MetalDevice::compile(string const &source)
|
||||
{
|
||||
make_source(pso_type, kernel_features);
|
||||
|
||||
if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
|
||||
/* We already have a full set of matching pipelines which are cached or queued. */
|
||||
metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
|
||||
return true;
|
||||
}
|
||||
|
||||
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
|
||||
|
||||
options.fastMathEnabled = YES;
|
||||
@@ -340,30 +304,19 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
|
||||
options.languageVersion = MTLLanguageVersion2_4;
|
||||
}
|
||||
|
||||
if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
|
||||
path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
|
||||
source[pso_type]);
|
||||
}
|
||||
|
||||
const double starttime = time_dt();
|
||||
|
||||
NSError *error = NULL;
|
||||
mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
|
||||
options:options
|
||||
error:&error];
|
||||
id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
|
||||
options:options
|
||||
error:&error];
|
||||
|
||||
if (!mtlLibrary[pso_type]) {
|
||||
if (!mtlLibrary) {
|
||||
NSString *err = [error localizedDescription];
|
||||
set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
|
||||
}
|
||||
|
||||
metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
|
||||
time_dt() - starttime,
|
||||
kernel_type_as_string(pso_type));
|
||||
|
||||
[options release];
|
||||
|
||||
return MetalDeviceKernels::load(this, pso_type);
|
||||
return mtlLibrary;
|
||||
}
|
||||
|
||||
void MetalDevice::reserve_local_memory(const uint kernel_features)
|
||||
@@ -670,63 +623,11 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MetalDevice::optimize_for_scene(Scene *scene)
|
||||
{
|
||||
MetalPipelineType specialization_level = kernel_specialization_level;
|
||||
|
||||
if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
|
||||
* synchronously. */
|
||||
compile_and_load(PSO_SPECIALIZED_INTERSECT);
|
||||
|
||||
if (specialization_level < PSO_SPECIALIZED_SHADE) {
|
||||
return;
|
||||
}
|
||||
if (!scene->params.background) {
|
||||
/* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
|
||||
* build. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
|
||||
* only if there isn't an existing load in flight.
|
||||
*/
|
||||
auto specialize_shade_fn = ^() {
|
||||
compile_and_load(PSO_SPECIALIZED_SHADE);
|
||||
async_compile_and_load = false;
|
||||
};
|
||||
|
||||
bool async_specialize_shade = true;
|
||||
|
||||
/* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
|
||||
if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
|
||||
async_specialize_shade = false;
|
||||
}
|
||||
|
||||
if (async_specialize_shade) {
|
||||
if (!async_compile_and_load) {
|
||||
async_compile_and_load = true;
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
|
||||
specialize_shade_fn);
|
||||
}
|
||||
else {
|
||||
metal_printf(
|
||||
"Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
|
||||
}
|
||||
}
|
||||
else {
|
||||
specialize_shade_fn();
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
{
|
||||
if (strcmp(name, "data") == 0) {
|
||||
assert(size == sizeof(KernelData));
|
||||
memcpy((uint8_t *)&launch_params.data, host, sizeof(KernelData));
|
||||
memcpy((uint8_t *)&launch_params + offsetof(KernelParamsMetal, data), host, size);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -747,7 +648,7 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
/* Update data storage pointers in launch parameters. */
|
||||
if (strcmp(name, "integrator_state") == 0) {
|
||||
/* IntegratorStateGPU is contiguous pointers */
|
||||
const size_t pointer_block_size = offsetof(IntegratorStateGPU, sort_partition_divisor);
|
||||
const size_t pointer_block_size = sizeof(IntegratorStateGPU);
|
||||
update_launch_pointers(
|
||||
offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size);
|
||||
}
|
||||
|
@@ -31,7 +31,7 @@ enum {
|
||||
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
||||
|
||||
/* Pipeline State Object types */
|
||||
enum MetalPipelineType {
|
||||
enum {
|
||||
/* A kernel that can be used with all scenes, supporting all features.
|
||||
* It is slow to compile, but only needs to be compiled once and is then
|
||||
* cached for future render sessions. This allows a render to get underway
|
||||
@@ -39,33 +39,28 @@ enum MetalPipelineType {
|
||||
*/
|
||||
PSO_GENERIC,
|
||||
|
||||
/* A intersection kernel that is very quick to specialize and results in faster intersection
|
||||
* kernel performance. It uses Metal function constants to replace several KernelData variables
|
||||
* with fixed constants.
|
||||
/* A kernel that is relatively quick to compile, but is specialized for the
|
||||
* scene being rendered. It only contains the functionality and even baked in
|
||||
* constants for values that means it needs to be recompiled whenever a
|
||||
* dependent setting is changed. The render performance of this kernel is
|
||||
* significantly faster though, and justifies the extra compile time.
|
||||
*/
|
||||
PSO_SPECIALIZED_INTERSECT,
|
||||
|
||||
/* A shading kernel that is slow to specialize, but results in faster shading kernel performance
|
||||
* rendered. It uses Metal function constants to replace several KernelData variables with fixed
|
||||
* constants and short-circuit all unused SVM node case handlers.
|
||||
*/
|
||||
PSO_SPECIALIZED_SHADE,
|
||||
/* METAL_WIP: This isn't used and will require more changes to enable. */
|
||||
PSO_SPECIALISED,
|
||||
|
||||
PSO_NUM
|
||||
};
|
||||
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
||||
const char *kernel_type_as_string(int kernel_type);
|
||||
|
||||
struct MetalKernelPipeline {
|
||||
|
||||
void compile();
|
||||
|
||||
id<MTLLibrary> mtlLibrary = nil;
|
||||
MetalPipelineType pso_type;
|
||||
bool scene_specialized;
|
||||
string source_md5;
|
||||
size_t usage_count = 0;
|
||||
|
||||
KernelData kernel_data_;
|
||||
bool use_metalrt;
|
||||
bool metalrt_hair;
|
||||
bool metalrt_hair_thick;
|
||||
@@ -80,8 +75,6 @@ struct MetalKernelPipeline {
|
||||
id<MTLComputePipelineState> pipeline = nil;
|
||||
int num_threads_per_block = 0;
|
||||
|
||||
bool should_use_binary_archive() const;
|
||||
|
||||
string error_str;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
@@ -92,8 +85,7 @@ struct MetalKernelPipeline {
|
||||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
namespace MetalDeviceKernels {
|
||||
|
||||
bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
|
||||
bool load(MetalDevice *device, MetalPipelineType pso_type);
|
||||
bool load(MetalDevice *device, bool scene_specialized);
|
||||
const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
|
||||
|
||||
} /* namespace MetalDeviceKernels */
|
||||
|
@@ -5,7 +5,6 @@
|
||||
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "kernel/device/metal/function_constants.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/tbb.h"
|
||||
@@ -17,15 +16,13 @@ CCL_NAMESPACE_BEGIN
|
||||
/* limit to 2 MTLCompiler instances */
|
||||
int max_mtlcompiler_threads = 2;
|
||||
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type)
|
||||
const char *kernel_type_as_string(int kernel_type)
|
||||
{
|
||||
switch (pso_type) {
|
||||
switch (kernel_type) {
|
||||
case PSO_GENERIC:
|
||||
return "PSO_GENERIC";
|
||||
case PSO_SPECIALIZED_INTERSECT:
|
||||
return "PSO_SPECIALIZED_INTERSECT";
|
||||
case PSO_SPECIALIZED_SHADE:
|
||||
return "PSO_SPECIALIZED_SHADE";
|
||||
case PSO_SPECIALISED:
|
||||
return "PSO_SPECIALISED";
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@@ -53,11 +50,7 @@ struct ShaderCache {
|
||||
|
||||
/* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
|
||||
* device. */
|
||||
void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
|
||||
|
||||
bool should_load_kernel(DeviceKernel device_kernel,
|
||||
MetalDevice *device,
|
||||
MetalPipelineType pso_type);
|
||||
void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
|
||||
|
||||
void wait_for_all();
|
||||
|
||||
@@ -146,53 +139,9 @@ void ShaderCache::compile_thread_func(int thread_index)
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
|
||||
MetalDevice *device,
|
||||
MetalPipelineType pso_type)
|
||||
{
|
||||
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
/* Skip megakernel. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
if ((device->kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) {
|
||||
/* Skip shade_surface_raytrace kernel if the scene doesn't require it. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
/* Only specialize kernels where it can make an impact. */
|
||||
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Only specialize shading / intersection kernels as requested. */
|
||||
bool is_shade_kernel = (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
|
||||
if (is_shade_pso != is_shade_kernel) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/* check whether the kernel has already been requested / cached */
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &pipeline : pipelines[device_kernel]) {
|
||||
if (pipeline->source_md5 == device->source_md5[pso_type]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
MetalDevice *device,
|
||||
MetalPipelineType pso_type)
|
||||
bool scene_specialized)
|
||||
{
|
||||
{
|
||||
/* create compiler threads on first run */
|
||||
@@ -205,21 +154,52 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
}
|
||||
}
|
||||
|
||||
if (!should_load_kernel(device_kernel, device, pso_type)) {
|
||||
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
/* skip megakernel */
|
||||
return;
|
||||
}
|
||||
|
||||
if (scene_specialized) {
|
||||
/* Only specialize kernels where it can make an impact. */
|
||||
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/* check whether the kernel has already been requested / cached */
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &pipeline : pipelines[device_kernel]) {
|
||||
if (scene_specialized) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
|
||||
/* we already requested a pipeline that is specialized for this kernel data */
|
||||
metal_printf("Specialized kernel already requested (%s)\n",
|
||||
device_kernel_as_string(device_kernel));
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
|
||||
/* we already requested a generic pipeline for this kernel */
|
||||
metal_printf("Generic kernel already requested (%s)\n",
|
||||
device_kernel_as_string(device_kernel));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
incomplete_requests++;
|
||||
|
||||
PipelineRequest request;
|
||||
request.pipeline = new MetalKernelPipeline;
|
||||
memcpy(&request.pipeline->kernel_data_,
|
||||
&device->launch_params.data,
|
||||
sizeof(request.pipeline->kernel_data_));
|
||||
request.pipeline->pso_type = pso_type;
|
||||
request.pipeline->scene_specialized = scene_specialized;
|
||||
request.pipeline->mtlDevice = mtlDevice;
|
||||
request.pipeline->source_md5 = device->source_md5[pso_type];
|
||||
request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
|
||||
request.pipeline->source_md5 =
|
||||
device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
|
||||
request.pipeline->mtlLibrary =
|
||||
device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
|
||||
request.pipeline->device_kernel = device_kernel;
|
||||
request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
|
||||
|
||||
@@ -234,24 +214,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
auto &collection = pipelines[device_kernel];
|
||||
|
||||
/* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
|
||||
int max_entries_of_same_pso_type = 3;
|
||||
for (int i = (int)collection.size() - 1; i >= 0; i--) {
|
||||
if (collection[i]->pso_type == pso_type) {
|
||||
max_entries_of_same_pso_type -= 1;
|
||||
if (max_entries_of_same_pso_type == 0) {
|
||||
metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
|
||||
kernel_type_as_string(pso_type),
|
||||
device_kernel_as_string(device_kernel));
|
||||
collection.erase(collection.begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
|
||||
pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
|
||||
request_queue.push_back(request);
|
||||
}
|
||||
cond_var.notify_one();
|
||||
@@ -285,9 +248,8 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pipeline->pso_type != PSO_GENERIC) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
|
||||
pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
|
||||
if (pipeline->scene_specialized) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
|
||||
best_pipeline = pipeline.get();
|
||||
}
|
||||
}
|
||||
@@ -296,65 +258,13 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
||||
}
|
||||
}
|
||||
|
||||
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
|
||||
metal_printf("Swapping in %s version of %s\n",
|
||||
kernel_type_as_string(best_pipeline->pso_type),
|
||||
device_kernel_as_string(kernel));
|
||||
}
|
||||
best_pipeline->usage_count += 1;
|
||||
|
||||
return best_pipeline;
|
||||
}
|
||||
|
||||
bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
{
|
||||
if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||
if (atoi(str) != 0) {
|
||||
/* Don't archive if we have opted out by env var. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pso_type == PSO_GENERIC) {
|
||||
/* Archive the generic kernels. */
|
||||
return true;
|
||||
}
|
||||
|
||||
if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
|
||||
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
|
||||
/* Archive all shade kernels - they take a long time to compile. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The remaining kernels are all fast to compile. They may get cached by the system shader cache,
|
||||
* but will be quick to regenerate if not. */
|
||||
return false;
|
||||
}
|
||||
|
||||
static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nullptr)
|
||||
{
|
||||
MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues new];
|
||||
|
||||
MTLDataType MTLDataType_int = MTLDataTypeInt;
|
||||
MTLDataType MTLDataType_float = MTLDataTypeFloat;
|
||||
MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
|
||||
KernelData zero_data = {0};
|
||||
if (!data) {
|
||||
data = &zero_data;
|
||||
}
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
[constant_values setConstantValue:&data->parent.name \
|
||||
type:MTLDataType_##_type \
|
||||
atIndex:KernelData_##parent##_##name];
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
return constant_values;
|
||||
}
|
||||
|
||||
void MetalKernelPipeline::compile()
|
||||
{
|
||||
int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
|
||||
|
||||
const std::string function_name = std::string("cycles_metal_") +
|
||||
device_kernel_as_string(device_kernel);
|
||||
|
||||
@@ -371,17 +281,6 @@ void MetalKernelPipeline::compile()
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = entryPoint;
|
||||
|
||||
if (pso_type == PSO_SPECIALIZED_SHADE) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
func_desc.constantValues = GetConstantValues();
|
||||
}
|
||||
|
||||
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
|
||||
}
|
||||
|
||||
@@ -528,7 +427,10 @@ void MetalKernelPipeline::compile()
|
||||
|
||||
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||
|
||||
bool use_binary_archive = should_use_binary_archive();
|
||||
bool use_binary_archive = true;
|
||||
if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||
use_binary_archive = (atoi(str) == 0);
|
||||
}
|
||||
|
||||
id<MTLBinaryArchive> archive = nil;
|
||||
string metalbin_path;
|
||||
@@ -706,32 +608,19 @@ void MetalKernelPipeline::compile()
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
|
||||
bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
|
||||
{
|
||||
const double starttime = time_dt();
|
||||
auto shader_cache = get_shader_cache(device->mtlDevice);
|
||||
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
|
||||
shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
|
||||
shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
|
||||
}
|
||||
|
||||
shader_cache->wait_for_all();
|
||||
metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
|
||||
time_dt() - starttime,
|
||||
kernel_type_as_string(pso_type));
|
||||
if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
|
||||
shader_cache->wait_for_all();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
|
||||
{
|
||||
auto shader_cache = get_shader_cache(device->mtlDevice);
|
||||
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
|
||||
if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
|
||||
DeviceKernel kernel)
|
||||
{
|
||||
|
@@ -24,7 +24,6 @@ class MetalDeviceQueue : public DeviceQueue {
|
||||
|
||||
virtual int num_concurrent_states(const size_t) const override;
|
||||
virtual int num_concurrent_busy_states() const override;
|
||||
virtual int num_sort_partition_elements() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
|
@@ -293,11 +293,6 @@ int MetalDeviceQueue::num_concurrent_busy_states() const
|
||||
return result;
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_sort_partition_elements() const
|
||||
{
|
||||
return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::init_execution()
|
||||
{
|
||||
/* Synchronize all textures and memory copies before executing task. */
|
||||
@@ -364,7 +359,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
/* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
|
||||
/* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
|
||||
size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) +
|
||||
offsetof(IntegratorStateGPU, sort_partition_divisor);
|
||||
sizeof(IntegratorStateGPU);
|
||||
size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
|
||||
memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
|
||||
(uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
|
||||
@@ -421,7 +416,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
|
||||
const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) +
|
||||
offsetof(IntegratorStateGPU, sort_partition_divisor);
|
||||
sizeof(IntegratorStateGPU);
|
||||
for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
|
||||
int pointer_index = int(offset / sizeof(device_ptr));
|
||||
MetalDevice::MetalMem *mmem = *(
|
||||
@@ -555,7 +550,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
/* Enhanced command buffer errors are only available in 11.0+ */
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
|
||||
metal_device_->set_error(string("CommandBuffer Failed: ") + [kernel_name UTF8String]);
|
||||
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||
NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
|
||||
valueForKey:MTLCommandBufferEncoderInfoErrorKey];
|
||||
if (encoderInfos != nil) {
|
||||
@@ -569,7 +564,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
}
|
||||
}
|
||||
else if (command_buffer.error) {
|
||||
metal_device_->set_error(string("CommandBuffer Failed: ") + [kernel_name UTF8String]);
|
||||
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||
}
|
||||
}
|
||||
}];
|
||||
|
@@ -25,20 +25,10 @@ enum MetalGPUVendor {
|
||||
METAL_GPU_INTEL = 3,
|
||||
};
|
||||
|
||||
enum AppleGPUArchitecture {
|
||||
APPLE_UNKNOWN,
|
||||
APPLE_M1,
|
||||
APPLE_M2,
|
||||
};
|
||||
|
||||
/* Contains static Metal helper functions. */
|
||||
struct MetalInfo {
|
||||
static vector<id<MTLDevice>> const &get_usable_devices();
|
||||
static int get_apple_gpu_core_count(id<MTLDevice> device);
|
||||
static MetalGPUVendor get_device_vendor(id<MTLDevice> device);
|
||||
static AppleGPUArchitecture get_apple_gpu_architecture(id<MTLDevice> device);
|
||||
static int optimal_sort_partition_elements(id<MTLDevice> device);
|
||||
static string get_device_name(id<MTLDevice> device);
|
||||
static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
|
||||
};
|
||||
|
||||
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
|
||||
|
@@ -10,83 +10,26 @@
|
||||
# include "util/string.h"
|
||||
# include "util/time.h"
|
||||
|
||||
# include <IOKit/IOKitLib.h>
|
||||
# include <pwd.h>
|
||||
# include <sys/shm.h>
|
||||
# include <time.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
string MetalInfo::get_device_name(id<MTLDevice> device)
|
||||
MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
|
||||
{
|
||||
string device_name = [device.name UTF8String];
|
||||
if (get_device_vendor(device) == METAL_GPU_APPLE) {
|
||||
/* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
|
||||
int gpu_core_count = get_apple_gpu_core_count(device);
|
||||
device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
|
||||
}
|
||||
return device_name;
|
||||
}
|
||||
|
||||
int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
|
||||
{
|
||||
int core_count = 0;
|
||||
if (@available(macos 12.0, *)) {
|
||||
io_service_t gpu_service = IOServiceGetMatchingService(
|
||||
kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
|
||||
if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
|
||||
gpu_service, CFSTR("gpu-core-count"), 0, 0)) {
|
||||
if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
|
||||
CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
|
||||
}
|
||||
CFRelease(numberRef);
|
||||
}
|
||||
}
|
||||
return core_count;
|
||||
}
|
||||
|
||||
AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
|
||||
{
|
||||
const char *device_name = [device.name UTF8String];
|
||||
if (strstr(device_name, "M1")) {
|
||||
return APPLE_M1;
|
||||
}
|
||||
else if (strstr(device_name, "M2")) {
|
||||
return APPLE_M2;
|
||||
}
|
||||
return APPLE_UNKNOWN;
|
||||
}
|
||||
|
||||
MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
|
||||
{
|
||||
const char *device_name = [device.name UTF8String];
|
||||
if (strstr(device_name, "Intel")) {
|
||||
if (device_name.find("Intel") != string::npos) {
|
||||
return METAL_GPU_INTEL;
|
||||
}
|
||||
else if (strstr(device_name, "AMD")) {
|
||||
else if (device_name.find("AMD") != string::npos) {
|
||||
return METAL_GPU_AMD;
|
||||
}
|
||||
else if (strstr(device_name, "Apple")) {
|
||||
else if (device_name.find("Apple") != string::npos) {
|
||||
return METAL_GPU_APPLE;
|
||||
}
|
||||
return METAL_GPU_UNKNOWN;
|
||||
}
|
||||
|
||||
int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device)
|
||||
{
|
||||
if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
|
||||
return atoi(str);
|
||||
}
|
||||
|
||||
/* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
|
||||
* sorting each partition by material. Partitioning into chunks of 65536 elements results in an
|
||||
* overall render time speedup of up to 15%. */
|
||||
if (get_device_vendor(device) == METAL_GPU_APPLE) {
|
||||
return 65536;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
{
|
||||
static vector<id<MTLDevice>> usable_devices;
|
||||
@@ -98,8 +41,9 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
|
||||
metal_printf("Usable Metal devices:\n");
|
||||
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
||||
string device_name = get_device_name(device);
|
||||
MetalGPUVendor vendor = get_device_vendor(device);
|
||||
const char *device_name = [device.name UTF8String];
|
||||
|
||||
MetalGPUVendor vendor = get_vendor_from_device_name(device_name);
|
||||
bool usable = false;
|
||||
|
||||
if (@available(macos 12.2, *)) {
|
||||
@@ -111,12 +55,12 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
}
|
||||
|
||||
if (usable) {
|
||||
metal_printf("- %s\n", device_name.c_str());
|
||||
metal_printf("- %s\n", device_name);
|
||||
[device retain];
|
||||
usable_devices.push_back(device);
|
||||
}
|
||||
else {
|
||||
metal_printf(" (skipping \"%s\")\n", device_name.c_str());
|
||||
metal_printf(" (skipping \"%s\")\n", device_name);
|
||||
}
|
||||
}
|
||||
if (usable_devices.empty()) {
|
||||
|
@@ -1,185 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Intel Corporation */
|
||||
|
||||
#include "device/oneapi/device.h"
|
||||
|
||||
#include "util/log.h"
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
# include "device/device.h"
|
||||
# include "device/oneapi/device_impl.h"
|
||||
|
||||
# include "util/path.h"
|
||||
# include "util/string.h"
|
||||
|
||||
# ifdef __linux__
|
||||
# include <dlfcn.h>
|
||||
# endif
|
||||
#endif /* WITH_ONEAPI */
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
static OneAPIDLLInterface oneapi_dll;
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# define LOAD_ONEAPI_SHARED_LIBRARY(path) (void *)(LoadLibrary(path))
|
||||
# define FREE_SHARED_LIBRARY(handle) FreeLibrary((HMODULE)handle)
|
||||
# define GET_SHARED_LIBRARY_SYMBOL(handle, name) GetProcAddress((HMODULE)handle, name)
|
||||
#elif __linux__
|
||||
# define LOAD_ONEAPI_SHARED_LIBRARY(path) dlopen(path, RTLD_NOW)
|
||||
# define FREE_SHARED_LIBRARY(handle) dlclose(handle)
|
||||
# define GET_SHARED_LIBRARY_SYMBOL(handle, name) dlsym(handle, name)
|
||||
#endif
|
||||
|
||||
bool device_oneapi_init()
|
||||
{
|
||||
#if !defined(WITH_ONEAPI)
|
||||
return false;
|
||||
#else
|
||||
|
||||
string lib_path = path_get("lib");
|
||||
# ifdef _WIN32
|
||||
lib_path = path_join(lib_path, "cycles_kernel_oneapi.dll");
|
||||
# else
|
||||
lib_path = path_join(lib_path, "cycles_kernel_oneapi.so");
|
||||
# endif
|
||||
void *lib_handle = LOAD_ONEAPI_SHARED_LIBRARY(lib_path.c_str());
|
||||
|
||||
/* This shouldn't happen, but it still makes sense to have a branch for this. */
|
||||
if (lib_handle == NULL) {
|
||||
LOG(ERROR) << "oneAPI kernel shared library cannot be loaded for some reason. This should not "
|
||||
"happen, however, it occurs hence oneAPI rendering will be disabled";
|
||||
return false;
|
||||
}
|
||||
|
||||
# define DLL_INTERFACE_CALL(function, return_type, ...) \
|
||||
(oneapi_dll.function) = reinterpret_cast<decltype(oneapi_dll.function)>( \
|
||||
GET_SHARED_LIBRARY_SYMBOL(lib_handle, #function)); \
|
||||
if (oneapi_dll.function == NULL) { \
|
||||
LOG(ERROR) << "oneAPI shared library function \"" << #function \
|
||||
<< "\" has not been loaded from kernel shared - disable oneAPI " \
|
||||
"library disable oneAPI implementation due to this"; \
|
||||
FREE_SHARED_LIBRARY(lib_handle); \
|
||||
return false; \
|
||||
}
|
||||
# include "kernel/device/oneapi/dll_interface_template.h"
|
||||
# undef DLL_INTERFACE_CALL
|
||||
|
||||
VLOG_INFO << "oneAPI kernel shared library has been loaded successfully";
|
||||
|
||||
/* We need to have this oneapi kernel shared library during all life-span of the Blender.
|
||||
* So it is not unloaded because of this.
|
||||
* FREE_SHARED_LIBRARY(lib_handle); */
|
||||
|
||||
/* NOTE(@nsirgien): we need to enable JIT cache from here and
|
||||
* right now this cache policy is controlled by env. variables. */
|
||||
/* NOTE(hallade) we also disable use of copy engine as it
|
||||
* improves stability as of intel/LLVM SYCL-nightly/20220529.
|
||||
* All these env variable can be set beforehand by end-users and
|
||||
* will in that case -not- be overwritten. */
|
||||
# ifdef _WIN32
|
||||
if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
|
||||
_putenv_s("SYCL_CACHE_PERSISTENT", "1");
|
||||
}
|
||||
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
|
||||
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
||||
}
|
||||
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "host,level_zero");
|
||||
}
|
||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||
_putenv_s("SYCL_ENABLE_PCI", "1");
|
||||
}
|
||||
if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE") == nullptr) {
|
||||
_putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0");
|
||||
}
|
||||
# elif __linux__
|
||||
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
||||
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
||||
setenv("SYCL_DEVICE_FILTER", "host,level_zero", false);
|
||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||
# endif
|
||||
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(__linux__)
|
||||
# undef LOAD_SYCL_SHARED_LIBRARY
|
||||
# undef LOAD_ONEAPI_SHARED_LIBRARY
|
||||
# undef FREE_SHARED_LIBRARY
|
||||
# undef GET_SHARED_LIBRARY_SYMBOL
|
||||
#endif
|
||||
|
||||
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
{
|
||||
#ifdef WITH_ONEAPI
|
||||
return new OneapiDevice(info, oneapi_dll, stats, profiler);
|
||||
#else
|
||||
(void)info;
|
||||
(void)stats;
|
||||
(void)profiler;
|
||||
|
||||
LOG(FATAL) << "Requested to create oneAPI device while not enabled for this build.";
|
||||
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
|
||||
{
|
||||
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
|
||||
|
||||
DeviceInfo info;
|
||||
|
||||
info.type = DEVICE_ONEAPI;
|
||||
info.description = name;
|
||||
info.num = num;
|
||||
|
||||
/* NOTE(@nsirgien): Should be unique at least on proper oneapi installation. */
|
||||
info.id = id;
|
||||
|
||||
info.has_nanovdb = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
info.has_gpu_queue = true;
|
||||
|
||||
/* NOTE(@nsirgien): oneAPI right now is focused on one device usage. In future it maybe will
|
||||
* change, but right now peer access from one device to another device is not supported. */
|
||||
info.has_peer_memory = false;
|
||||
|
||||
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
|
||||
info.display_device = false;
|
||||
|
||||
devices->push_back(info);
|
||||
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
|
||||
}
|
||||
#endif
|
||||
|
||||
void device_oneapi_info(vector<DeviceInfo> &devices)
|
||||
{
|
||||
#ifdef WITH_ONEAPI
|
||||
(oneapi_dll.oneapi_iterate_devices)(device_iterator_cb, &devices);
|
||||
#else /* WITH_ONEAPI */
|
||||
(void)devices;
|
||||
#endif /* WITH_ONEAPI */
|
||||
}
|
||||
|
||||
string device_oneapi_capabilities()
|
||||
{
|
||||
string capabilities;
|
||||
#ifdef WITH_ONEAPI
|
||||
char *c_capabilities = (oneapi_dll.oneapi_device_capabilities)();
|
||||
if (c_capabilities) {
|
||||
capabilities = c_capabilities;
|
||||
(oneapi_dll.oneapi_free)(c_capabilities);
|
||||
}
|
||||
#endif
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,24 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/string.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Device;
|
||||
class DeviceInfo;
|
||||
class Profiler;
|
||||
class Stats;
|
||||
|
||||
bool device_oneapi_init();
|
||||
|
||||
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
void device_oneapi_info(vector<DeviceInfo> &devices);
|
||||
|
||||
string device_oneapi_capabilities();
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,426 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Intel Corporation */
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
|
||||
# include "device/oneapi/device_impl.h"
|
||||
|
||||
# include "util/debug.h"
|
||||
# include "util/log.h"
|
||||
|
||||
# include "kernel/device/oneapi/kernel.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
static void queue_error_cb(const char *message, void *user_ptr)
|
||||
{
|
||||
if (user_ptr) {
|
||||
*reinterpret_cast<std::string *>(user_ptr) = message;
|
||||
}
|
||||
}
|
||||
|
||||
OneapiDevice::OneapiDevice(const DeviceInfo &info,
|
||||
OneAPIDLLInterface &oneapi_dll_object,
|
||||
Stats &stats,
|
||||
Profiler &profiler)
|
||||
: Device(info, stats, profiler),
|
||||
device_queue_(nullptr),
|
||||
texture_info_(this, "texture_info", MEM_GLOBAL),
|
||||
kg_memory_(nullptr),
|
||||
kg_memory_device_(nullptr),
|
||||
kg_memory_size_(0),
|
||||
oneapi_dll_(oneapi_dll_object)
|
||||
{
|
||||
need_texture_info_ = false;
|
||||
|
||||
oneapi_dll_.oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
|
||||
|
||||
/* OneAPI calls should be initialized on this moment. */
|
||||
assert(oneapi_dll_.oneapi_create_queue != nullptr);
|
||||
|
||||
bool is_finished_ok = oneapi_dll_.oneapi_create_queue(device_queue_, info.num);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI queue initialization error: got runtime exception \"" +
|
||||
oneapi_error_string_ + "\"");
|
||||
}
|
||||
else {
|
||||
VLOG_DEBUG << "oneAPI queue has been successfully created for the device \""
|
||||
<< info.description << "\"";
|
||||
assert(device_queue_);
|
||||
}
|
||||
|
||||
size_t globals_segment_size;
|
||||
is_finished_ok = oneapi_dll_.oneapi_kernel_globals_size(device_queue_, globals_segment_size);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI constant memory initialization got runtime exception \"" +
|
||||
oneapi_error_string_ + "\"");
|
||||
}
|
||||
else {
|
||||
VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)";
|
||||
}
|
||||
|
||||
kg_memory_ = oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
|
||||
oneapi_dll_.oneapi_usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
|
||||
|
||||
kg_memory_device_ = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, globals_segment_size);
|
||||
|
||||
kg_memory_size_ = globals_segment_size;
|
||||
}
|
||||
|
||||
OneapiDevice::~OneapiDevice()
|
||||
{
|
||||
texture_info_.free();
|
||||
oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_);
|
||||
oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_device_);
|
||||
|
||||
for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
|
||||
delete mt->second;
|
||||
|
||||
if (device_queue_)
|
||||
oneapi_dll_.oneapi_free_queue(device_queue_);
|
||||
}
|
||||
|
||||
bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
}
|
||||
|
||||
bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
{
|
||||
assert(device_queue_);
|
||||
/* NOTE(@nsirgien): oneAPI can support compilation of kernel code with certain feature set
|
||||
* with specialization constants, but it hasn't been implemented yet. */
|
||||
(void)requested_features;
|
||||
|
||||
bool is_finished_ok = oneapi_dll_.oneapi_run_test_kernel(device_queue_);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI kernel load: got runtime exception \"" + oneapi_error_string_ + "\"");
|
||||
}
|
||||
else {
|
||||
VLOG_INFO << "Runtime compilation done for \"" << info.description << "\"";
|
||||
assert(device_queue_);
|
||||
}
|
||||
return is_finished_ok;
|
||||
}
|
||||
|
||||
void OneapiDevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info_) {
|
||||
need_texture_info_ = false;
|
||||
texture_info_.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_alloc(device_memory &mem)
|
||||
{
|
||||
size_t memory_size = mem.memory_size();
|
||||
|
||||
/* TODO(@nsirgien): In future, if scene doesn't fit into device memory, then
|
||||
* we can use USM host memory.
|
||||
* Because of the expected performance impact, implementation of this has had a low priority
|
||||
* and is not implemented yet. */
|
||||
|
||||
assert(device_queue_);
|
||||
/* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
|
||||
* and shared. For new project it maybe more beneficial to use USM shared memory, because it
|
||||
* provides automatic migration mechanism in order to allow to use the same pointer on host and
|
||||
* on device, without need to worry about explicit memory transfer operations. But for
|
||||
* Blender/Cycles this type of memory is not very suitable in current application architecture,
|
||||
* because Cycles already uses two different pointer for host activity and device activity, and
|
||||
* also has to perform all needed memory transfer operations. So, USM device memory
|
||||
* type has been used for oneAPI device in order to better fit in Cycles architecture. */
|
||||
void *device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size);
|
||||
if (device_pointer == nullptr) {
|
||||
size_t max_memory_on_device = oneapi_dll_.oneapi_get_memcapacity(device_queue_);
|
||||
set_error("oneAPI kernel - device memory allocation error for " +
|
||||
string_human_readable_size(mem.memory_size()) +
|
||||
", possibly caused by lack of available memory space on the device: " +
|
||||
string_human_readable_size(stats.mem_used) + " of " +
|
||||
string_human_readable_size(max_memory_on_device) + " is already allocated");
|
||||
return;
|
||||
}
|
||||
assert(device_pointer);
|
||||
|
||||
mem.device_pointer = reinterpret_cast<ccl::device_ptr>(device_pointer);
|
||||
mem.device_size = memory_size;
|
||||
|
||||
stats.mem_alloc(memory_size);
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
size_t memory_size = mem.memory_size();
|
||||
|
||||
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
|
||||
assert(mem.host_pointer);
|
||||
assert(device_queue_);
|
||||
oneapi_dll_.oneapi_usm_memcpy(
|
||||
device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
|
||||
}
|
||||
|
||||
/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
|
||||
SyclQueue *OneapiDevice::sycl_queue()
|
||||
{
|
||||
return device_queue_;
|
||||
}
|
||||
|
||||
string OneapiDevice::oneapi_error_message()
|
||||
{
|
||||
return string(oneapi_error_string_);
|
||||
}
|
||||
|
||||
OneAPIDLLInterface OneapiDevice::oneapi_dll_object()
|
||||
{
|
||||
return oneapi_dll_;
|
||||
}
|
||||
|
||||
void *OneapiDevice::kernel_globals_device_pointer()
|
||||
{
|
||||
return kg_memory_device_;
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_free(device_memory &mem)
|
||||
{
|
||||
assert(mem.device_pointer);
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_size = 0;
|
||||
|
||||
assert(device_queue_);
|
||||
oneapi_dll_.oneapi_usm_free(device_queue_, (void *)mem.device_pointer);
|
||||
mem.device_pointer = 0;
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_alloc(device_memory &mem)
|
||||
{
|
||||
if (mem.type == MEM_TEXTURE) {
|
||||
assert(!"mem_alloc not supported for textures.");
|
||||
}
|
||||
else if (mem.type == MEM_GLOBAL) {
|
||||
assert(!"mem_alloc not supported for global memory.");
|
||||
}
|
||||
else {
|
||||
if (mem.name) {
|
||||
VLOG_DEBUG << "OneapiDevice::mem_alloc: \"" << mem.name << "\", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
generic_alloc(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_copy_to(device_memory &mem)
|
||||
{
|
||||
if (mem.name) {
|
||||
VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer)
|
||||
mem_alloc(mem);
|
||||
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
|
||||
{
|
||||
if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
|
||||
assert(!"mem_copy_from not supported for textures.");
|
||||
}
|
||||
else if (mem.host_pointer) {
|
||||
const size_t size = (w > 0 || h > 0 || elem > 0) ? (elem * w * h) : mem.memory_size();
|
||||
const size_t offset = elem * y * w;
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_DEBUG << "OneapiDevice::mem_copy_from: \"" << mem.name << "\" object of "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ") from offset " << offset
|
||||
<< " data " << size << " bytes";
|
||||
}
|
||||
|
||||
assert(device_queue_);
|
||||
|
||||
assert(size != 0);
|
||||
assert(mem.device_pointer);
|
||||
char *shifted_host = reinterpret_cast<char *>(mem.host_pointer) + offset;
|
||||
char *shifted_device = reinterpret_cast<char *>(mem.device_pointer) + offset;
|
||||
bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy(
|
||||
device_queue_, shifted_host, shifted_device, size);
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
|
||||
"\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_zero(device_memory &mem)
|
||||
{
|
||||
if (mem.name) {
|
||||
VLOG_DEBUG << "OneapiDevice::mem_zero: \"" << mem.name << "\", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")\n";
|
||||
}
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
mem_alloc(mem);
|
||||
}
|
||||
if (!mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(device_queue_);
|
||||
bool is_finished_ok = oneapi_dll_.oneapi_usm_memset(
|
||||
device_queue_, (void *)mem.device_pointer, 0, mem.memory_size());
|
||||
if (is_finished_ok == false) {
|
||||
set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
|
||||
"\"");
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_free(device_memory &mem)
|
||||
{
|
||||
if (mem.name) {
|
||||
VLOG_DEBUG << "OneapiDevice::mem_free: \"" << mem.name << "\", "
|
||||
<< string_human_readable_number(mem.device_size) << " bytes. ("
|
||||
<< string_human_readable_size(mem.device_size) << ")\n";
|
||||
}
|
||||
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
generic_free(mem);
|
||||
}
|
||||
}
|
||||
|
||||
device_ptr OneapiDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
|
||||
{
|
||||
return reinterpret_cast<device_ptr>(reinterpret_cast<char *>(mem.device_pointer) +
|
||||
mem.memory_elements_size(offset));
|
||||
}
|
||||
|
||||
void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
{
|
||||
assert(name);
|
||||
|
||||
VLOG_DEBUG << "OneapiDevice::const_copy_to \"" << name << "\" object "
|
||||
<< string_human_readable_number(size) << " bytes. ("
|
||||
<< string_human_readable_size(size) << ")";
|
||||
|
||||
ConstMemMap::iterator i = const_mem_map_.find(name);
|
||||
device_vector<uchar> *data;
|
||||
|
||||
if (i == const_mem_map_.end()) {
|
||||
data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
|
||||
data->alloc(size);
|
||||
const_mem_map_.insert(ConstMemMap::value_type(name, data));
|
||||
}
|
||||
else {
|
||||
data = i->second;
|
||||
}
|
||||
|
||||
assert(data->memory_size() <= size);
|
||||
memcpy(data->data(), host, size);
|
||||
data->copy_to_device();
|
||||
|
||||
oneapi_dll_.oneapi_set_global_memory(
|
||||
device_queue_, kg_memory_, name, (void *)data->device_pointer);
|
||||
|
||||
oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
|
||||
}
|
||||
|
||||
void OneapiDevice::global_alloc(device_memory &mem)
|
||||
{
|
||||
assert(mem.name);
|
||||
|
||||
size_t size = mem.memory_size();
|
||||
VLOG_DEBUG << "OneapiDevice::global_alloc \"" << mem.name << "\" object "
|
||||
<< string_human_readable_number(size) << " bytes. ("
|
||||
<< string_human_readable_size(size) << ")";
|
||||
|
||||
generic_alloc(mem);
|
||||
generic_copy_to(mem);
|
||||
|
||||
oneapi_dll_.oneapi_set_global_memory(
|
||||
device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
|
||||
|
||||
oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
|
||||
}
|
||||
|
||||
void OneapiDevice::global_free(device_memory &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
generic_free(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
generic_alloc(mem);
|
||||
generic_copy_to(mem);
|
||||
|
||||
/* Resize if needed. Also, in case of resize - allocate in advance for future allocs. */
|
||||
const uint slot = mem.slot;
|
||||
if (slot >= texture_info_.size()) {
|
||||
texture_info_.resize(slot + 128);
|
||||
}
|
||||
|
||||
texture_info_[slot] = mem.info;
|
||||
need_texture_info_ = true;
|
||||
|
||||
texture_info_[slot].data = (uint64_t)mem.device_pointer;
|
||||
}
|
||||
|
||||
void OneapiDevice::tex_free(device_texture &mem)
|
||||
{
|
||||
/* There is no texture memory in SYCL. */
|
||||
if (mem.device_pointer) {
|
||||
generic_free(mem);
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
|
||||
{
|
||||
return make_unique<OneapiDeviceQueue>(this);
|
||||
}
|
||||
|
||||
bool OneapiDevice::should_use_graphics_interop()
|
||||
{
|
||||
/* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
|
||||
* return false. */
|
||||
return false;
|
||||
}
|
||||
|
||||
void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment)
|
||||
{
|
||||
assert(device_queue_);
|
||||
return oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, memory_size, alignment);
|
||||
}
|
||||
|
||||
void OneapiDevice::usm_free(void *usm_ptr)
|
||||
{
|
||||
assert(device_queue_);
|
||||
return oneapi_dll_.oneapi_usm_free(device_queue_, usm_ptr);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
@@ -1,100 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Intel Corporation */
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
|
||||
# include "device/device.h"
|
||||
# include "device/oneapi/device.h"
|
||||
# include "device/oneapi/queue.h"
|
||||
|
||||
# include "util/map.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class OneapiDevice : public Device {
|
||||
private:
|
||||
SyclQueue *device_queue_;
|
||||
|
||||
using ConstMemMap = map<string, device_vector<uchar> *>;
|
||||
ConstMemMap const_mem_map_;
|
||||
device_vector<TextureInfo> texture_info_;
|
||||
bool need_texture_info_;
|
||||
void *kg_memory_;
|
||||
void *kg_memory_device_;
|
||||
size_t kg_memory_size_ = (size_t)0;
|
||||
OneAPIDLLInterface oneapi_dll_;
|
||||
std::string oneapi_error_string_;
|
||||
|
||||
public:
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
OneapiDevice(const DeviceInfo &info,
|
||||
OneAPIDLLInterface &oneapi_dll_object,
|
||||
Stats &stats,
|
||||
Profiler &profiler);
|
||||
|
||||
virtual ~OneapiDevice();
|
||||
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
bool load_kernels(const uint requested_features) override;
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
void generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
SyclQueue *sycl_queue();
|
||||
|
||||
string oneapi_error_message();
|
||||
|
||||
OneAPIDLLInterface oneapi_dll_object();
|
||||
|
||||
void *kernel_globals_device_pointer();
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem)
|
||||
{
|
||||
mem_copy_from(mem, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
void mem_zero(device_memory &mem) override;
|
||||
|
||||
void mem_free(device_memory &mem) override;
|
||||
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
void global_alloc(device_memory &mem);
|
||||
|
||||
void global_free(device_memory &mem);
|
||||
|
||||
void tex_alloc(device_texture &mem);
|
||||
|
||||
void tex_free(device_texture &mem);
|
||||
|
||||
/* Graphics resources interoperability. */
|
||||
virtual bool should_use_graphics_interop() override;
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
/* NOTE(@nsirgien): Create this methods to avoid some compilation problems on Windows with host
|
||||
* side compilation (MSVC). */
|
||||
void *usm_aligned_alloc_host(size_t memory_size, size_t alignment);
|
||||
void usm_free(void *usm_ptr);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
@@ -1,17 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
/* Include kernel header to get access to SYCL-specific types, like SyclQueue and
|
||||
* OneAPIDeviceIteratorCallback. */
|
||||
#include "kernel/device/oneapi/kernel.h"
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
struct OneAPIDLLInterface {
|
||||
# define DLL_INTERFACE_CALL(function, return_type, ...) \
|
||||
return_type (*function)(__VA_ARGS__) = nullptr;
|
||||
# include "kernel/device/oneapi/dll_interface_template.h"
|
||||
# undef DLL_INTERFACE_CALL
|
||||
};
|
||||
#endif
|
@@ -1,165 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Intel Corporation */
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
|
||||
# include "device/oneapi/queue.h"
|
||||
# include "device/oneapi/device_impl.h"
|
||||
# include "util/log.h"
|
||||
# include "util/time.h"
|
||||
# include <iomanip>
|
||||
# include <vector>
|
||||
|
||||
# include "kernel/device/oneapi/kernel.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelExecutionInfo {
|
||||
double elapsed_summary = 0.0;
|
||||
int enqueue_count = 0;
|
||||
};
|
||||
|
||||
/* OneapiDeviceQueue */
|
||||
|
||||
OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
|
||||
: DeviceQueue(device),
|
||||
oneapi_device_(device),
|
||||
oneapi_dll_(device->oneapi_dll_object()),
|
||||
kernel_context_(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
OneapiDeviceQueue::~OneapiDeviceQueue()
|
||||
{
|
||||
delete kernel_context_;
|
||||
}
|
||||
|
||||
int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
|
||||
{
|
||||
int num_states;
|
||||
|
||||
/* TODO: implement and use get_num_multiprocessors and get_max_num_threads_per_multiprocessor. */
|
||||
const size_t compute_units = oneapi_dll_.oneapi_get_compute_units_amount(
|
||||
oneapi_device_->sycl_queue());
|
||||
if (compute_units >= 128) {
|
||||
/* dGPU path, make sense to allocate more states, because it will be dedicated GPU memory. */
|
||||
int base = 1024 * 1024;
|
||||
/* linear dependency (with coefficient less that 1) from amount of compute units. */
|
||||
num_states = (base * (compute_units / 128)) * 3 / 4;
|
||||
|
||||
/* Limit amount of integrator states by one quarter of device memory, because
|
||||
* other allocations will need some space as well
|
||||
* TODO: base this calculation on the how many states what the GPU is actually capable of
|
||||
* running, with some headroom to improve occupancy. If the texture don't fit, offload into
|
||||
* unified memory. */
|
||||
size_t states_memory_size = num_states * state_size;
|
||||
size_t device_memory_amount =
|
||||
(oneapi_dll_.oneapi_get_memcapacity)(oneapi_device_->sycl_queue());
|
||||
if (states_memory_size >= device_memory_amount / 4) {
|
||||
num_states = device_memory_amount / 4 / state_size;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* iGPU path - no real need to allocate a lot of integrator states because it is shared GPU
|
||||
* memory. */
|
||||
num_states = 1024 * 512;
|
||||
}
|
||||
|
||||
VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
|
||||
<< string_human_readable_size(num_states * state_size);
|
||||
|
||||
return num_states;
|
||||
}
|
||||
|
||||
int OneapiDeviceQueue::num_concurrent_busy_states() const
|
||||
{
|
||||
const size_t compute_units = oneapi_dll_.oneapi_get_compute_units_amount(
|
||||
oneapi_device_->sycl_queue());
|
||||
if (compute_units >= 128) {
|
||||
return 1024 * 1024;
|
||||
}
|
||||
else {
|
||||
return 1024 * 512;
|
||||
}
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::init_execution()
|
||||
{
|
||||
oneapi_device_->load_texture_info();
|
||||
|
||||
SyclQueue *device_queue = oneapi_device_->sycl_queue();
|
||||
void *kg_dptr = (void *)oneapi_device_->kernel_globals_device_pointer();
|
||||
assert(device_queue);
|
||||
assert(kg_dptr);
|
||||
kernel_context_ = new KernelContext{device_queue, kg_dptr};
|
||||
|
||||
debug_init_execution();
|
||||
}
|
||||
|
||||
bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int signed_kernel_work_size,
|
||||
DeviceKernelArguments const &_args)
|
||||
{
|
||||
if (oneapi_device_->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void **args = const_cast<void **>(_args.values);
|
||||
|
||||
debug_enqueue(kernel, signed_kernel_work_size);
|
||||
assert(signed_kernel_work_size >= 0);
|
||||
size_t kernel_work_size = (size_t)signed_kernel_work_size;
|
||||
|
||||
size_t kernel_local_size = oneapi_dll_.oneapi_kernel_preferred_local_size(
|
||||
kernel_context_->queue, (::DeviceKernel)kernel, kernel_work_size);
|
||||
size_t uniformed_kernel_work_size = round_up(kernel_work_size, kernel_local_size);
|
||||
|
||||
assert(kernel_context_);
|
||||
|
||||
/* Call the oneAPI kernel DLL to launch the requested kernel. */
|
||||
bool is_finished_ok = oneapi_dll_.oneapi_enqueue_kernel(
|
||||
kernel_context_, kernel, uniformed_kernel_work_size, args);
|
||||
|
||||
if (is_finished_ok == false) {
|
||||
oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
|
||||
"\" execution error: got runtime exception \"" +
|
||||
oneapi_device_->oneapi_error_message() + "\"");
|
||||
}
|
||||
|
||||
return is_finished_ok;
|
||||
}
|
||||
|
||||
bool OneapiDeviceQueue::synchronize()
|
||||
{
|
||||
if (oneapi_device_->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_finished_ok = oneapi_dll_.oneapi_queue_synchronize(oneapi_device_->sycl_queue());
|
||||
if (is_finished_ok == false)
|
||||
oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
|
||||
oneapi_device_->oneapi_error_message() + "\"");
|
||||
|
||||
debug_synchronize();
|
||||
|
||||
return !(oneapi_device_->have_error());
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::zero_to_device(device_memory &mem)
|
||||
{
|
||||
oneapi_device_->mem_zero(mem);
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::copy_to_device(device_memory &mem)
|
||||
{
|
||||
oneapi_device_->mem_copy_to(mem);
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::copy_from_device(device_memory &mem)
|
||||
{
|
||||
oneapi_device_->mem_copy_from(mem);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_ONEAPI */
|
@@ -1,51 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Intel Corporation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_ONEAPI
|
||||
|
||||
# include "device/kernel.h"
|
||||
# include "device/memory.h"
|
||||
# include "device/queue.h"
|
||||
|
||||
# include "device/oneapi/device.h"
|
||||
# include "device/oneapi/dll_interface.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class OneapiDevice;
|
||||
class device_memory;
|
||||
|
||||
/* Base class for OneAPI queues. */
|
||||
class OneapiDeviceQueue : public DeviceQueue {
|
||||
public:
|
||||
explicit OneapiDeviceQueue(OneapiDevice *device);
|
||||
~OneapiDeviceQueue();
|
||||
|
||||
virtual int num_concurrent_states(const size_t state_size) const override;
|
||||
|
||||
virtual int num_concurrent_busy_states() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int kernel_work_size,
|
||||
DeviceKernelArguments const &args) override;
|
||||
|
||||
virtual bool synchronize() override;
|
||||
|
||||
virtual void zero_to_device(device_memory &mem) override;
|
||||
virtual void copy_to_device(device_memory &mem) override;
|
||||
virtual void copy_from_device(device_memory &mem) override;
|
||||
|
||||
protected:
|
||||
OneapiDevice *oneapi_device_;
|
||||
OneAPIDLLInterface oneapi_dll_;
|
||||
KernelContext *kernel_context_;
|
||||
bool with_kernel_statistics_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_ONEAPI */
|
@@ -2047,7 +2047,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)
|
||||
|
||||
/* Update traversable handle (since it is different for each device on multi devices). */
|
||||
KernelData *const data = (KernelData *)host;
|
||||
*(OptixTraversableHandle *)&data->device_bvh = tlas_handle;
|
||||
*(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
|
||||
|
||||
update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
|
||||
return;
|
||||
|
@@ -105,13 +105,6 @@ class DeviceQueue {
|
||||
* value. */
|
||||
virtual int num_concurrent_busy_states() const = 0;
|
||||
|
||||
/* Number of elements in a partition of sorted shaders, that improves memory locality of
|
||||
* integrator state fetch at the cost of decreased coherence for shader kernel execution. */
|
||||
virtual int num_sort_partition_elements() const
|
||||
{
|
||||
return 65536;
|
||||
}
|
||||
|
||||
/* Initialize execution of kernels on this queue.
|
||||
*
|
||||
* Will, for example, load all data required by the kernels from Device to global or path state.
|
||||
|
@@ -373,7 +373,7 @@ void PathTrace::path_trace(RenderWork &render_work)
|
||||
work_balance_infos_[i].time_spent += work_time;
|
||||
work_balance_infos_[i].occupancy = statistics.occupancy;
|
||||
|
||||
VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
|
||||
VLOG_WORK << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
|
||||
<< work_time / num_samples
|
||||
<< " seconds per sample), occupancy: " << statistics.occupancy;
|
||||
});
|
||||
@@ -1103,8 +1103,6 @@ static const char *device_type_for_description(const DeviceType type)
|
||||
return "OptiX";
|
||||
case DEVICE_HIP:
|
||||
return "HIP";
|
||||
case DEVICE_ONEAPI:
|
||||
return "oneAPI";
|
||||
case DEVICE_DUMMY:
|
||||
return "Dummy";
|
||||
case DEVICE_MULTI:
|
||||
|
@@ -181,45 +181,27 @@ void PathTraceWorkGPU::alloc_integrator_queue()
|
||||
|
||||
void PathTraceWorkGPU::alloc_integrator_sorting()
|
||||
{
|
||||
/* Compute sort partitions, to balance between memory locality and coherence.
|
||||
* Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
|
||||
* more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
|
||||
*/
|
||||
num_sort_partitions_ = 1;
|
||||
if (device_scene_->data.max_shaders < 300) {
|
||||
const int num_elements = queue_->num_sort_partition_elements();
|
||||
if (num_elements) {
|
||||
num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
|
||||
}
|
||||
}
|
||||
|
||||
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
|
||||
num_sort_partitions_);
|
||||
|
||||
/* Allocate arrays for shader sorting. */
|
||||
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||
if (integrator_shader_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_sort_counter_.alloc(sort_buckets);
|
||||
const int max_shaders = device_scene_->data.max_shaders;
|
||||
if (integrator_shader_sort_counter_.size() < max_shaders) {
|
||||
integrator_shader_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_mnee_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_sort_prefix_sum_.alloc(max_shaders);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
|
||||
(int *)integrator_shader_sort_counter_.device_pointer;
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
}
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
}
|
||||
|
||||
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -351,12 +333,8 @@ void PathTraceWorkGPU::enqueue_reset()
|
||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
|
||||
queue_->zero_to_device(integrator_queue_counter_);
|
||||
queue_->zero_to_device(integrator_shader_sort_counter_);
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
|
||||
}
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
|
||||
}
|
||||
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
|
||||
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
|
||||
|
||||
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
|
||||
* counter on the host side because `zero_to_device()` is not doing it. */
|
||||
@@ -508,9 +486,9 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
||||
/* Compute prefix sum of number of active paths with each shader. */
|
||||
{
|
||||
const int work_size = 1;
|
||||
int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||
int max_shaders = device_scene_->data.max_shaders;
|
||||
|
||||
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &sort_buckets);
|
||||
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
|
||||
|
||||
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
|
||||
}
|
||||
|
@@ -156,9 +156,6 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
bool interop_use_checked_ = false;
|
||||
bool interop_use_ = false;
|
||||
|
||||
/* Number of partitions to sort state indices into prior to material sort. */
|
||||
int num_sort_partitions_;
|
||||
|
||||
/* Maximum number of concurrent integrator states. */
|
||||
int max_num_paths_;
|
||||
|
||||
|
@@ -37,10 +37,6 @@ set(SRC_KERNEL_DEVICE_OPTIX
|
||||
device/optix/kernel_shader_raytrace.cu
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_ONEAPI
|
||||
device/oneapi/kernel.cpp
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_CPU_HEADERS
|
||||
device/cpu/compat.h
|
||||
device/cpu/image.h
|
||||
@@ -79,20 +75,9 @@ set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
||||
device/metal/compat.h
|
||||
device/metal/context_begin.h
|
||||
device/metal/context_end.h
|
||||
device/metal/function_constants.h
|
||||
device/metal/globals.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
|
||||
device/oneapi/compat.h
|
||||
device/oneapi/context_begin.h
|
||||
device/oneapi/context_end.h
|
||||
device/oneapi/globals.h
|
||||
device/oneapi/image.h
|
||||
device/oneapi/kernel.h
|
||||
device/oneapi/kernel_templates.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||
closure/alloc.h
|
||||
closure/bsdf.h
|
||||
@@ -155,7 +140,6 @@ set(SRC_KERNEL_SVM_HEADERS
|
||||
svm/math_util.h
|
||||
svm/mix.h
|
||||
svm/musgrave.h
|
||||
svm/node_types_template.h
|
||||
svm/noise.h
|
||||
svm/noisetex.h
|
||||
svm/normal.h
|
||||
@@ -284,7 +268,6 @@ set(SRC_KERNEL_UTIL_HEADERS
|
||||
|
||||
set(SRC_KERNEL_TYPES_HEADERS
|
||||
data_arrays.h
|
||||
data_template.h
|
||||
tables.h
|
||||
types.h
|
||||
)
|
||||
@@ -704,209 +687,6 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
cycles_set_solution_folder(cycles_kernel_optix)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
if(WIN32)
|
||||
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
|
||||
else()
|
||||
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.so)
|
||||
endif()
|
||||
|
||||
set(cycles_oneapi_kernel_sources
|
||||
${SRC_KERNEL_DEVICE_ONEAPI}
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
|
||||
${SRC_UTIL_HEADERS}
|
||||
)
|
||||
|
||||
# SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options
|
||||
set(sycl_compiler_flags
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
|
||||
-fsycl
|
||||
-fsycl-unnamed-lambda
|
||||
-fdelayed-template-parsing
|
||||
-mllvm -inlinedefault-threshold=300
|
||||
-mllvm -inlinehint-threshold=400
|
||||
-shared
|
||||
-DWITH_ONEAPI
|
||||
-ffast-math
|
||||
-DNDEBUG
|
||||
-O2
|
||||
-o ${cycles_kernel_oneapi_lib}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
${SYCL_CPP_FLAGS}
|
||||
)
|
||||
|
||||
|
||||
if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
|
||||
list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED)
|
||||
endif()
|
||||
|
||||
# Set defaults for spir64 and spir64_gen options
|
||||
if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
|
||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
|
||||
endif()
|
||||
if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
||||
SET (CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
|
||||
endif()
|
||||
# enabling zebin (graphics binary format with improved compatibility) on Windows only while support on Linux isn't available yet
|
||||
if(WIN32)
|
||||
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
|
||||
endif()
|
||||
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")
|
||||
|
||||
if (WITH_CYCLES_ONEAPI_BINARIES)
|
||||
# Iterate over all targest and their options
|
||||
list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
|
||||
list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
|
||||
foreach(target ${CYCLES_ONEAPI_SYCL_TARGETS})
|
||||
if(DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_${target})
|
||||
list (APPEND sycl_compiler_flags -Xsycl-target-backend=${target} "${CYCLES_ONEAPI_SYCL_OPTIONS_${target}}")
|
||||
endif()
|
||||
endforeach()
|
||||
else()
|
||||
# If AOT is disabled, build for spir64
|
||||
list(APPEND sycl_compiler_flags
|
||||
-fsycl-targets=spir64
|
||||
-Xsycl-target-backend=spir64 "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}")
|
||||
endif()
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
list(APPEND sycl_compiler_flags
|
||||
-DWITH_NANOVDB
|
||||
-I"${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
||||
get_filename_component(sycl_compiler_root ${SYCL_COMPILER} DIRECTORY)
|
||||
get_filename_component(sycl_compiler_compiler_name ${SYCL_COMPILER} NAME_WE)
|
||||
|
||||
if(NOT OCLOC_INSTALL_DIR)
|
||||
get_filename_component(OCLOC_INSTALL_DIR "${sycl_compiler_root}/../lib/ocloc" ABSOLUTE)
|
||||
endif()
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT EXISTS ${OCLOC_INSTALL_DIR})
|
||||
message(FATAL_ERROR "WITH_CYCLES_ONEAPI_BINARIES requires ocloc but ${OCLOC_INSTALL_DIR} directory doesn't exist."
|
||||
" A different ocloc directory can be set using OCLOC_INSTALL_DIR cmake variable.")
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
if(NOT WITH_CXX11_ABI)
|
||||
check_library_exists(sycl
|
||||
_ZN2cl4sycl7handler22verifyUsedKernelBundleERKSs ${sycl_compiler_root}/../lib SYCL_NO_CXX11_ABI)
|
||||
if(SYCL_NO_CXX11_ABI)
|
||||
list(APPEND sycl_compiler_flags -D_GLIBCXX_USE_CXX11_ABI=0)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
list(APPEND sycl_compiler_flags
|
||||
-fms-extensions
|
||||
-fms-compatibility
|
||||
-D_WINDLL
|
||||
-D_MBCS
|
||||
-DWIN32
|
||||
-D_WINDOWS
|
||||
-D_CRT_NONSTDC_NO_DEPRECATE
|
||||
-D_CRT_SECURE_NO_DEPRECATE
|
||||
-DONEAPI_EXPORT)
|
||||
|
||||
if(sycl_compiler_compiler_name MATCHES "dpcpp")
|
||||
# The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
|
||||
add_custom_command(
|
||||
OUTPUT ${cycles_kernel_oneapi_lib}
|
||||
COMMAND "${sycl_compiler_root}/../../env/vars.bat"
|
||||
COMMAND ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
|
||||
DEPENDS ${cycles_oneapi_kernel_sources})
|
||||
else()
|
||||
# The open source SYCL compiler just goes by clang++ and does not have such a script.
|
||||
# Set the variables manually.
|
||||
string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
|
||||
if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows
|
||||
get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
|
||||
string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
|
||||
get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
|
||||
else()
|
||||
set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
|
||||
endif()
|
||||
list(APPEND sycl_compiler_flags
|
||||
-L "${MSVC_TOOLS_DIR}/lib/x64"
|
||||
-L "${WINDOWS_KIT_DIR}/um/x64"
|
||||
-L "${WINDOWS_KIT_DIR}/ucrt/x64")
|
||||
add_custom_command(
|
||||
OUTPUT ${cycles_kernel_oneapi_lib}
|
||||
COMMAND ${CMAKE_COMMAND} -E env
|
||||
"LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib
|
||||
"PATH=${OCLOC_INSTALL_DIR};${sycl_compiler_root}"
|
||||
${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
|
||||
DEPENDS ${cycles_oneapi_kernel_sources})
|
||||
endif()
|
||||
else()
|
||||
list(APPEND sycl_compiler_flags -fPIC)
|
||||
|
||||
# We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash
|
||||
# it triggers at either AoT or JIT stages gets fixed.
|
||||
list(APPEND sycl_compiler_flags -fhonor-nans)
|
||||
|
||||
# add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and
|
||||
# libpi_level_zero.so can be placed next to it and get found.
|
||||
list(APPEND sycl_compiler_flags -Wl,-rpath,'$$ORIGIN')
|
||||
|
||||
# The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
|
||||
if(sycl_compiler_compiler_name MATCHES "dpcpp")
|
||||
add_custom_command(
|
||||
OUTPUT ${cycles_kernel_oneapi_lib}
|
||||
COMMAND bash -c \"source ${sycl_compiler_root}/../../env/vars.sh&&${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}\"
|
||||
DEPENDS ${cycles_oneapi_kernel_sources})
|
||||
else()
|
||||
# The open source SYCL compiler just goes by clang++ and does not have such a script.
|
||||
# Set the variables manually.
|
||||
if(NOT IGC_INSTALL_DIR)
|
||||
get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
|
||||
endif()
|
||||
add_custom_command(
|
||||
OUTPUT ${cycles_kernel_oneapi_lib}
|
||||
COMMAND ${CMAKE_COMMAND} -E env
|
||||
"LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
|
||||
"PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld
|
||||
${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
|
||||
DEPENDS ${cycles_oneapi_kernel_sources})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# install dynamic libraries required at runtime
|
||||
if(WIN32)
|
||||
set(SYCL_RUNTIME_DEPENDENCIES
|
||||
sycl.dll
|
||||
pi_level_zero.dll
|
||||
)
|
||||
if(NOT WITH_BLENDER)
|
||||
# For the Cycles standalone put libraries next to the Cycles application.
|
||||
delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH})
|
||||
else()
|
||||
# For Blender put the libraries next to the Blender executable.
|
||||
#
|
||||
# Note that the installation path in the delayed_install is relative to the versioned folder,
|
||||
# which means we need to go one level up.
|
||||
delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" "../")
|
||||
endif()
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
file(GLOB SYCL_RUNTIME_DEPENDENCIES
|
||||
${sycl_compiler_root}/../lib/libsycl.so
|
||||
${sycl_compiler_root}/../lib/libsycl.so.[0-9]
|
||||
${sycl_compiler_root}/../lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9]
|
||||
)
|
||||
list(APPEND SYCL_RUNTIME_DEPENDENCIES ${sycl_compiler_root}/../lib/libpi_level_zero.so)
|
||||
delayed_install("" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
endif()
|
||||
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
|
||||
endif()
|
||||
|
||||
# OSL module
|
||||
|
||||
if(WITH_CYCLES_OSL)
|
||||
@@ -972,7 +752,6 @@ cycles_add_library(cycles_kernel "${LIB}"
|
||||
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_METAL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
|
||||
)
|
||||
|
||||
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
||||
@@ -985,7 +764,6 @@ source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
||||
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
||||
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
||||
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
|
||||
source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
|
||||
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
||||
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||
@@ -1004,9 +782,6 @@ endif()
|
||||
if(WITH_CYCLES_HIP)
|
||||
add_dependencies(cycles_kernel cycles_kernel_hip)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
add_dependencies(cycles_kernel cycles_kernel_oneapi)
|
||||
endif()
|
||||
|
||||
# Install kernel source for runtime compilation
|
||||
|
||||
|
@@ -29,14 +29,14 @@ ccl_device void kernel_displace_evaluate(KernelGlobals kg,
|
||||
object_inverse_dir_transform(kg, &sd, &D);
|
||||
|
||||
#ifdef __KERNEL_DEBUG_NAN__
|
||||
if (!isfinite_safe(D)) {
|
||||
if (!isfinite3_safe(D)) {
|
||||
kernel_assert(!"Cycles displacement with non-finite value detected");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Ensure finite displacement, preventing BVH from becoming degenerate and avoiding possible
|
||||
* traversal issues caused by non-finite math. */
|
||||
D = ensure_finite(D);
|
||||
D = ensure_finite3(D);
|
||||
|
||||
/* Write output. */
|
||||
output[offset * 3 + 0] += D.x;
|
||||
@@ -68,13 +68,13 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
|
||||
float3 color = shader_background_eval(&sd);
|
||||
|
||||
#ifdef __KERNEL_DEBUG_NAN__
|
||||
if (!isfinite_safe(color)) {
|
||||
if (!isfinite3_safe(color)) {
|
||||
kernel_assert(!"Cycles background with non-finite value detected");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */
|
||||
color = ensure_finite(color);
|
||||
color = ensure_finite3(color);
|
||||
|
||||
/* Write output. */
|
||||
output[offset * 3 + 0] += color.x;
|
||||
|
@@ -172,11 +172,11 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
ray_flags,
|
||||
@@ -203,28 +203,28 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
#elif defined(__METALRT__)
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->t = ray->t;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
# if defined(__KERNEL_DEBUG__)
|
||||
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->t = ray->t;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->t = ray->t;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
kernel_assert(!"Invalid ift_default");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
@@ -263,7 +263,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
# endif
|
||||
|
||||
if (intersection.type == intersection_type::none) {
|
||||
isect->t = ray->tmax;
|
||||
isect->t = ray->t;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
|
||||
return false;
|
||||
@@ -295,14 +295,14 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
isect->t = ray->tmax;
|
||||
if (kernel_data.bvh.scene) {
|
||||
isect->t = ray->t;
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRayHit ray_hit;
|
||||
ctx.ray = ray;
|
||||
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
|
||||
rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
|
||||
rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
|
||||
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
|
||||
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
|
||||
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
|
||||
@@ -357,11 +357,11 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0; /* Initialize hit count to zero. */
|
||||
}
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
0xFF,
|
||||
/* Need to always call into __anyhit__kernel_optix_local_hit. */
|
||||
@@ -405,7 +405,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
@@ -451,7 +451,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
if (kernel_data.bvh.scene) {
|
||||
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
|
||||
SD_OBJECT_TRANSFORM_APPLIED);
|
||||
CCLIntersectContext ctx(
|
||||
@@ -470,13 +470,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
|
||||
/* If this object has its own BVH, use it. */
|
||||
if (has_bvh) {
|
||||
RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
|
||||
RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
|
||||
if (geom) {
|
||||
float3 P = ray->P;
|
||||
float3 dir = ray->D;
|
||||
float3 idir = ray->D;
|
||||
Transform ob_itfm;
|
||||
rtc_ray.tfar = ray->tmax *
|
||||
rtc_ray.tfar = ray->t *
|
||||
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
/* bvh_instance_motion_push() returns the inverse transform but
|
||||
* it's not needed here. */
|
||||
@@ -496,7 +496,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
|
||||
}
|
||||
|
||||
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
|
||||
@@ -539,11 +539,11 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
|
||||
@@ -582,7 +582,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
@@ -633,7 +633,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
if (kernel_data.bvh.scene) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
Intersection *isect_array = (Intersection *)state->shadow_isect;
|
||||
ctx.isect_s = isect_array;
|
||||
@@ -642,7 +642,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
|
||||
|
||||
*num_recorded_hits = ctx.num_recorded_hits;
|
||||
*throughput = ctx.throughput;
|
||||
@@ -698,11 +698,11 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
ray_mask = 0xFF;
|
||||
}
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
ray->tmin,
|
||||
ray->tmax,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
ray_mask,
|
||||
/* Need to always call into __anyhit__kernel_optix_volume_test. */
|
||||
@@ -744,7 +744,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
@@ -825,7 +825,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
if (kernel_data.bvh.scene) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
ctx.isect_s = isect;
|
||||
ctx.max_hits = max_hits;
|
||||
@@ -834,7 +834,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
|
||||
IntersectContext rtc_ctx(&ctx);
|
||||
RTCRay rtc_ray;
|
||||
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
|
||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
|
||||
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
|
||||
return ctx.num_hits;
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
|
@@ -83,8 +83,8 @@ ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
|
||||
rtc_ray.dir_x = ray.D.x;
|
||||
rtc_ray.dir_y = ray.D.y;
|
||||
rtc_ray.dir_z = ray.D.z;
|
||||
rtc_ray.tnear = ray.tmin;
|
||||
rtc_ray.tfar = ray.tmax;
|
||||
rtc_ray.tnear = 0.0f;
|
||||
rtc_ray.tfar = ray.t;
|
||||
rtc_ray.time = ray.time;
|
||||
rtc_ray.mask = visibility;
|
||||
}
|
||||
@@ -107,7 +107,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
|
||||
const int oID = hit->instID[0] / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
|
||||
const int pID = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
@@ -117,7 +117,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
|
||||
const int oID = hit->geomID / 2;
|
||||
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
|
||||
const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
|
||||
status = intersection_skip_self_shadow(ray->self, oID, pID);
|
||||
}
|
||||
}
|
||||
@@ -133,14 +133,14 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
|
||||
isect->t = ray->tfar;
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = hit->instID[0] / 2;
|
||||
}
|
||||
else {
|
||||
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
|
||||
isect->object = hit->geomID / 2;
|
||||
}
|
||||
|
||||
@@ -166,7 +166,7 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
|
||||
isect->v = hit->u;
|
||||
isect->t = ray->tfar;
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.device_bvh, object * 2));
|
||||
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = object;
|
||||
|
@@ -47,9 +47,8 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
float isect_t = ray->tmax;
|
||||
float isect_t = ray->t;
|
||||
|
||||
if (local_isect != NULL) {
|
||||
local_isect->num_hits = 0;
|
||||
@@ -60,13 +59,10 @@ ccl_device_inline
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
isect_t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
object = local_object;
|
||||
}
|
||||
|
||||
@@ -85,7 +81,6 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect_t,
|
||||
node_addr,
|
||||
PATH_RAY_ALL_VISIBILITY,
|
||||
@@ -160,7 +155,6 @@ ccl_device_inline
|
||||
local_object,
|
||||
prim,
|
||||
prim_addr,
|
||||
tmin,
|
||||
isect_t,
|
||||
lcg_state,
|
||||
max_hits)) {
|
||||
@@ -197,7 +191,6 @@ ccl_device_inline
|
||||
local_object,
|
||||
prim,
|
||||
prim_addr,
|
||||
tmin,
|
||||
isect_t,
|
||||
lcg_state,
|
||||
max_hits)) {
|
||||
|
@@ -18,8 +18,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
@@ -40,8 +39,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
|
||||
float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
|
||||
float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
|
||||
float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
|
||||
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
@@ -49,8 +48,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
|
||||
float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
|
||||
float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
|
||||
float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
|
||||
|
||||
dist[0] = c0min;
|
||||
dist[1] = c1min;
|
||||
@@ -67,8 +66,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
|
||||
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const float t,
|
||||
int node_addr,
|
||||
int child,
|
||||
float dist[2])
|
||||
@@ -85,8 +83,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
|
||||
const float far_x = max(lower_xyz.x, upper_xyz.x);
|
||||
const float far_y = max(lower_xyz.y, upper_xyz.y);
|
||||
const float far_z = max(lower_xyz.z, upper_xyz.z);
|
||||
const float tnear = max4(tmin, near_x, near_y, near_z);
|
||||
const float tfar = min4(tmax, far_x, far_y, far_z);
|
||||
const float tnear = max4(0.0f, near_x, near_y, near_z);
|
||||
const float tfar = min4(t, far_x, far_y, far_z);
|
||||
*dist = tnear;
|
||||
return tnear <= tfar;
|
||||
}
|
||||
@@ -95,8 +93,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
@@ -105,7 +102,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0);
|
||||
#endif
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) {
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((__float_as_uint(cnodes.x) & visibility))
|
||||
#endif
|
||||
@@ -113,7 +110,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
|
||||
mask |= 1;
|
||||
}
|
||||
}
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) {
|
||||
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((__float_as_uint(cnodes.y) & visibility))
|
||||
#endif
|
||||
@@ -128,17 +125,16 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
const float t,
|
||||
const int node_addr,
|
||||
const uint visibility,
|
||||
float dist[2])
|
||||
{
|
||||
float4 node = kernel_data_fetch(bvh_nodes, node_addr);
|
||||
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
||||
return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist);
|
||||
return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
|
||||
}
|
||||
else {
|
||||
return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist);
|
||||
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
|
||||
}
|
||||
}
|
||||
|
@@ -49,7 +49,6 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
uint num_hits = 0;
|
||||
|
||||
@@ -60,12 +59,12 @@ ccl_device_inline
|
||||
/* Max distance in world space. May be dynamically reduced when max number of
|
||||
* recorded hits is exceeded and we no longer need to find hits beyond the max
|
||||
* distance found. */
|
||||
float t_max_world = ray->tmax;
|
||||
float t_max_world = ray->t;
|
||||
|
||||
/* Current maximum distance to the intersection.
|
||||
* Is calculated as a ray length, transformed to an object space when entering
|
||||
* instance node. */
|
||||
float t_max_current = ray->tmax;
|
||||
float t_max_current = ray->t;
|
||||
|
||||
/* Conversion from world to local space for the current instance if any, 1.0
|
||||
* otherwise. */
|
||||
@@ -89,7 +88,6 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -158,16 +156,8 @@ ccl_device_inline
|
||||
|
||||
switch (type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr);
|
||||
hit = triangle_intersect(
|
||||
kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr);
|
||||
break;
|
||||
}
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
@@ -176,7 +166,6 @@ ccl_device_inline
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -200,16 +189,8 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
hit = curve_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
prim_object,
|
||||
prim,
|
||||
ray->time,
|
||||
curve_type);
|
||||
hit = curve_intersect(
|
||||
kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -226,16 +207,8 @@ ccl_device_inline
|
||||
}
|
||||
|
||||
const int point_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
hit = point_intersect(kg,
|
||||
&isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
t_max_current,
|
||||
prim_object,
|
||||
prim,
|
||||
ray->time,
|
||||
point_type);
|
||||
hit = point_intersect(
|
||||
kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type);
|
||||
break;
|
||||
}
|
||||
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
|
||||
@@ -329,7 +302,6 @@ ccl_device_inline
|
||||
|
||||
/* Convert intersection to object space. */
|
||||
t_max_current *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
@@ -351,8 +323,7 @@ ccl_device_inline
|
||||
#endif
|
||||
|
||||
/* Restore world space ray length. */
|
||||
tmin = ray->tmin;
|
||||
t_max_current = ray->tmax;
|
||||
t_max_current = ray->t;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
t_world_to_instance = 1.0f;
|
||||
|
@@ -43,14 +43,13 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
isect->t = ray->tmax;
|
||||
isect->t = ray->t;
|
||||
isect->u = 0.0f;
|
||||
isect->v = 0.0f;
|
||||
isect->prim = PRIM_NONE;
|
||||
@@ -72,7 +71,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect->t,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -135,16 +133,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
|
||||
switch (type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
if (triangle_intersect(kg,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect->t,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr)) {
|
||||
if (triangle_intersect(
|
||||
kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
return true;
|
||||
@@ -157,7 +147,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect->t,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -185,7 +174,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
|
||||
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
const bool hit = curve_intersect(
|
||||
kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type);
|
||||
kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type);
|
||||
if (hit) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
@@ -206,7 +195,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
|
||||
const int point_type = kernel_data_fetch(prim_type, prim_addr);
|
||||
const bool hit = point_intersect(
|
||||
kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type);
|
||||
kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type);
|
||||
if (hit) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
@@ -223,15 +212,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
object = kernel_data_fetch(prim_object, -prim_addr - 1);
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect->t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -250,7 +235,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
|
||||
#else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
#endif
|
||||
tmin = ray->tmin;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
|
@@ -5,19 +5,6 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Offset intersection distance by the smallest possible amount, to skip
|
||||
* intersections at this distance. This works in cases where the ray start
|
||||
* position is unchanged and only tmin is updated, since for self
|
||||
* intersection we'll be comparing against the exact same distances. */
|
||||
ccl_device_forceinline float intersection_t_offset(const float t)
|
||||
{
|
||||
/* This is a simplified version of nextafterf(t, FLT_MAX), only dealing with
|
||||
* non-negative and finite t. */
|
||||
kernel_assert(t >= 0.0f && isfinite_safe(t));
|
||||
const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
|
||||
return __uint_as_float(bits);
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_CPU__)
|
||||
ccl_device int intersections_compare(const void *a, const void *b)
|
||||
{
|
||||
|
@@ -46,14 +46,13 @@ ccl_device_inline
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
#endif
|
||||
|
||||
isect->t = ray->tmax;
|
||||
isect->t = ray->t;
|
||||
isect->u = 0.0f;
|
||||
isect->v = 0.0f;
|
||||
isect->prim = PRIM_NONE;
|
||||
@@ -74,7 +73,6 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect->t,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -142,7 +140,7 @@ ccl_device_inline
|
||||
continue;
|
||||
}
|
||||
triangle_intersect(
|
||||
kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr);
|
||||
kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -167,7 +165,6 @@ ccl_device_inline
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect->t,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -189,15 +186,11 @@ ccl_device_inline
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if (object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect->t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
++stack_ptr;
|
||||
kernel_assert(stack_ptr < BVH_STACK_SIZE);
|
||||
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
|
||||
@@ -224,8 +217,6 @@ ccl_device_inline
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
#endif
|
||||
|
||||
tmin = ray->tmin;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
--stack_ptr;
|
||||
|
@@ -44,12 +44,12 @@ ccl_device_inline
|
||||
int node_addr = kernel_data.bvh.root;
|
||||
|
||||
/* ray parameters in registers */
|
||||
const float tmax = ray->t;
|
||||
float3 P = ray->P;
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
float tmin = ray->tmin;
|
||||
int object = OBJECT_NONE;
|
||||
float isect_t = ray->tmax;
|
||||
float isect_t = tmax;
|
||||
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
@@ -58,7 +58,7 @@ ccl_device_inline
|
||||
int num_hits_in_instance = 0;
|
||||
|
||||
uint num_hits = 0;
|
||||
isect_array->t = ray->tmax;
|
||||
isect_array->t = tmax;
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
@@ -75,7 +75,6 @@ ccl_device_inline
|
||||
dir,
|
||||
#endif
|
||||
idir,
|
||||
tmin,
|
||||
isect_t,
|
||||
node_addr,
|
||||
visibility,
|
||||
@@ -142,16 +141,8 @@ ccl_device_inline
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
continue;
|
||||
}
|
||||
hit = triangle_intersect(kg,
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect_t,
|
||||
visibility,
|
||||
prim_object,
|
||||
prim,
|
||||
prim_addr);
|
||||
hit = triangle_intersect(
|
||||
kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr);
|
||||
if (hit) {
|
||||
/* Move on to next entry in intersections array. */
|
||||
isect_array++;
|
||||
@@ -198,7 +189,6 @@ ccl_device_inline
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
tmin,
|
||||
isect_t,
|
||||
ray->time,
|
||||
visibility,
|
||||
@@ -242,15 +232,11 @@ ccl_device_inline
|
||||
int object_flag = kernel_data_fetch(object_flag, object);
|
||||
if (object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
const float t_world_to_instance = bvh_instance_motion_push(
|
||||
kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
|
||||
#else
|
||||
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
|
||||
#endif
|
||||
|
||||
isect_t *= t_world_to_instance;
|
||||
tmin *= t_world_to_instance;
|
||||
|
||||
num_hits_in_instance = 0;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
@@ -294,8 +280,7 @@ ccl_device_inline
|
||||
#endif
|
||||
}
|
||||
|
||||
tmin = ray->tmin;
|
||||
isect_t = ray->tmax;
|
||||
isect_t = tmax;
|
||||
isect_array->t = isect_t;
|
||||
|
||||
object = OBJECT_NONE;
|
||||
|
@@ -165,11 +165,9 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
|
||||
float nearclip = kernel_data.cam.nearclip * z_inv;
|
||||
ray->P += nearclip * ray->D;
|
||||
ray->dP += nearclip * ray->dD;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = kernel_data.cam.cliplength * z_inv;
|
||||
ray->t = kernel_data.cam.cliplength * z_inv;
|
||||
#else
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = FLT_MAX;
|
||||
ray->t = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -233,11 +231,9 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,
|
||||
|
||||
#ifdef __CAMERA_CLIPPING__
|
||||
/* clipping */
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = kernel_data.cam.cliplength;
|
||||
ray->t = kernel_data.cam.cliplength;
|
||||
#else
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = FLT_MAX;
|
||||
ray->t = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -262,7 +258,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
|
||||
|
||||
/* indicates ray should not receive any light, outside of the lens */
|
||||
if (is_zero(D)) {
|
||||
ray->tmax = 0.0f;
|
||||
ray->t = 0.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -353,11 +349,9 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
|
||||
float nearclip = cam->nearclip;
|
||||
ray->P += nearclip * ray->D;
|
||||
ray->dP += nearclip * ray->dD;
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = cam->cliplength;
|
||||
ray->t = cam->cliplength;
|
||||
#else
|
||||
ray->tmin = 0.0f;
|
||||
ray->tmax = FLT_MAX;
|
||||
ray->t = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -374,7 +368,7 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
|
||||
ccl_private Ray *ray)
|
||||
{
|
||||
/* pixel filter */
|
||||
int filter_table_offset = kernel_data.tables.filter_table_offset;
|
||||
int filter_table_offset = kernel_data.film.filter_table_offset;
|
||||
float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
|
||||
float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
|
||||
|
||||
|
@@ -51,7 +51,7 @@ ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData *
|
||||
int size,
|
||||
float3 weight)
|
||||
{
|
||||
kernel_assert(isfinite_safe(weight));
|
||||
kernel_assert(isfinite3_safe(weight));
|
||||
|
||||
const float sample_weight = fabsf(average(weight));
|
||||
|
||||
@@ -77,7 +77,7 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
|
||||
float3 weight,
|
||||
void *data)
|
||||
{
|
||||
kernel_assert(isfinite_safe(weight));
|
||||
kernel_assert(isfinite3_safe(weight));
|
||||
|
||||
const float sample_weight = fabsf(average(weight));
|
||||
|
||||
|
@@ -439,7 +439,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
|
||||
*eval *= shift_cos_in(dot(*omega_in, sc->N), frequency_multiplier);
|
||||
}
|
||||
if (label & LABEL_DIFFUSE) {
|
||||
if (!isequal(sc->N, sd->N)) {
|
||||
if (!isequal_float3(sc->N, sd->N)) {
|
||||
*eval *= bump_shadowing_term((label & LABEL_TRANSMIT) ? -sd->N : sd->N, sc->N, *omega_in);
|
||||
}
|
||||
}
|
||||
@@ -550,7 +550,7 @@ ccl_device_inline
|
||||
break;
|
||||
}
|
||||
if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
|
||||
if (!isequal(sc->N, sd->N)) {
|
||||
if (!isequal_float3(sc->N, sd->N)) {
|
||||
eval *= bump_shadowing_term(sd->N, sc->N, omega_in);
|
||||
}
|
||||
}
|
||||
@@ -635,7 +635,7 @@ ccl_device_inline
|
||||
break;
|
||||
}
|
||||
if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
|
||||
if (!isequal(sc->N, sd->N)) {
|
||||
if (!isequal_float3(sc->N, sd->N)) {
|
||||
eval *= bump_shadowing_term(-sd->N, sc->N, omega_in);
|
||||
}
|
||||
}
|
||||
|
@@ -203,7 +203,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
|
||||
float h = (sd->type & PRIMITIVE_CURVE_RIBBON) ? -sd->v : dot(cross(sd->Ng, X), Z);
|
||||
|
||||
kernel_assert(fabsf(h) < 1.0f + 1e-4f);
|
||||
kernel_assert(isfinite_safe(Y));
|
||||
kernel_assert(isfinite3_safe(Y));
|
||||
kernel_assert(isfinite_safe(h));
|
||||
|
||||
bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
|
||||
@@ -272,7 +272,7 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
|
||||
const float3 omega_in,
|
||||
ccl_private float *pdf)
|
||||
{
|
||||
kernel_assert(isfinite_safe(sd->P) && isfinite_safe(sd->ray_length));
|
||||
kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
|
||||
|
||||
ccl_private const PrincipledHairBSDF *bsdf = (ccl_private const PrincipledHairBSDF *)sc;
|
||||
float3 Y = float4_to_float3(bsdf->extra->geom);
|
||||
@@ -299,7 +299,7 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
|
||||
float cos_gamma_t = cos_from_sin(sin_gamma_t);
|
||||
float gamma_t = safe_asinf(sin_gamma_t);
|
||||
|
||||
float3 T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
|
||||
float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
|
||||
float4 Ap[4];
|
||||
hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
|
||||
|
||||
@@ -319,25 +319,25 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
|
||||
Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
|
||||
Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
|
||||
F = Ap[0] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
/* Transmission (TT). */
|
||||
Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
|
||||
Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
|
||||
F += Ap[1] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
/* Secondary specular (TRT). */
|
||||
Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
|
||||
Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
|
||||
F += Ap[2] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
/* Residual component (TRRT+). */
|
||||
Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
|
||||
Np = M_1_2PI_F;
|
||||
F += Ap[3] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
*pdf = F.w;
|
||||
return float4_to_float3(F);
|
||||
@@ -385,7 +385,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
|
||||
float cos_gamma_t = cos_from_sin(sin_gamma_t);
|
||||
float gamma_t = safe_asinf(sin_gamma_t);
|
||||
|
||||
float3 T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
|
||||
float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
|
||||
float4 Ap[4];
|
||||
hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
|
||||
|
||||
@@ -436,25 +436,25 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
|
||||
Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
|
||||
Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
|
||||
F = Ap[0] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
/* Transmission (TT). */
|
||||
Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
|
||||
Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
|
||||
F += Ap[1] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
/* Secondary specular (TRT). */
|
||||
Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
|
||||
Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
|
||||
F += Ap[2] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
/* Residual component (TRRT+). */
|
||||
Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
|
||||
Np = M_1_2PI_F;
|
||||
F += Ap[3] * Mp * Np;
|
||||
kernel_assert(isfinite_safe(float4_to_float3(F)));
|
||||
kernel_assert(isfinite3_safe(float4_to_float3(F)));
|
||||
|
||||
*eval = float4_to_float3(F);
|
||||
*pdf = F.w;
|
||||
@@ -492,13 +492,13 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
|
||||
ccl_device float3 bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
|
||||
{
|
||||
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
|
||||
return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
|
||||
return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
|
||||
}
|
||||
|
||||
ccl_device_inline float3
|
||||
bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness)
|
||||
{
|
||||
const float3 sigma = log(color) /
|
||||
const float3 sigma = log3(color) /
|
||||
bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
|
||||
return sigma * sigma;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user