Compare commits

..

3 Commits

Author SHA1 Message Date
422c330390 UI: Select regions of text and toggle its style
1. Text can be selected along the mouse pointer by dragging the cursor .
2. After the selection we can toggle the style of the selected region.
3. Word can be selected using double click.
2022-09-01 17:31:37 +05:30
8cd76bff47 UI: Select regions of text and toggle its style
1)Text can be selected along the mouse pointer by dragging the cursor .
2) After the selection we can toggle the style of the selected region.
2022-08-18 12:02:18 +05:30
9d1cfd9dde Initial Commit 2022-08-17 12:02:10 +05:30
1562 changed files with 33294 additions and 66164 deletions

View File

@@ -265,7 +265,6 @@ ForEachMacros:
- SET_SLOT_PROBING_BEGIN
- MAP_SLOT_PROBING_BEGIN
- VECTOR_SET_SLOT_PROBING_BEGIN
- WL_ARRAY_FOR_EACH
StatementMacros:
- PyObject_HEAD

View File

@@ -222,17 +222,6 @@ if(UNIX AND NOT (APPLE OR HAIKU))
option(WITH_GHOST_WAYLAND "Enable building Blender against Wayland for windowing (under development)" OFF)
mark_as_advanced(WITH_GHOST_WAYLAND)
if (WITH_GHOST_WAYLAND)
option(WITH_GHOST_WAYLAND_LIBDECOR "Optionally build with LibDecor window decorations" OFF)
mark_as_advanced(WITH_GHOST_WAYLAND_LIBDECOR)
option(WITH_GHOST_WAYLAND_DBUS "Optionally build with DBUS support (used for Cursor themes). May hang on startup systems where DBUS is not used." OFF)
mark_as_advanced(WITH_GHOST_WAYLAND_DBUS)
option(WITH_GHOST_WAYLAND_DYNLOAD "Enable runtime dynamic WAYLAND libraries loading" OFF)
mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
endif()
endif()
if(WITH_GHOST_X11)
@@ -266,11 +255,19 @@ if(WITH_GHOST_X11)
endif()
if(UNIX AND NOT APPLE)
option(WITH_SYSTEM_GLEW "Use GLEW OpenGL wrapper library provided by the operating system" OFF)
option(WITH_SYSTEM_GLES "Use OpenGL ES library provided by the operating system" ON)
option(WITH_SYSTEM_FREETYPE "Use the freetype library provided by the operating system" OFF)
option(WITH_SYSTEM_EIGEN3 "Use the systems Eigen3 library" OFF)
else()
# not an option for other OS's
set(WITH_SYSTEM_GLEW OFF)
set(WITH_SYSTEM_GLES OFF)
set(WITH_SYSTEM_FREETYPE OFF)
set(WITH_SYSTEM_EIGEN3 OFF)
endif()
if(UNIX AND NOT APPLE)
option(WITH_SYSTEM_EIGEN3 "Use the systems Eigen3 library" OFF)
endif()
@@ -447,7 +444,7 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@@ -457,21 +454,6 @@ if(APPLE)
option(WITH_CYCLES_DEVICE_METAL "Enable Cycles Apple Metal compute support" ON)
endif()
# oneAPI
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_ONEAPI "Enable Cycles oneAPI compute support" OFF)
option(WITH_CYCLES_ONEAPI_BINARIES "Enable Ahead-Of-Time compilation for Cycles oneAPI device" OFF)
option(WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED "Enable use of SYCL host (CPU) device execution by oneAPI implementation. This option is for debugging purposes and impacts GPU execution." OFF)
# https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
SET (CYCLES_ONEAPI_SPIR64_GEN_DEVICES "dg2" CACHE STRING "oneAPI Intel GPU architectures to build binaries for")
SET (CYCLES_ONEAPI_SYCL_TARGETS spir64 spir64_gen CACHE STRING "oneAPI targets to build AOT binaries for")
mark_as_advanced(WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
mark_as_advanced(CYCLES_ONEAPI_SPIR64_GEN_DEVICES)
mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS)
endif()
# Draw Manager
option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
mark_as_advanced(WITH_DRAW_DEBUG)
@@ -536,48 +518,20 @@ endif()
# OpenGL
# Experimental EGL option.
option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, GLX or WGL)" OFF)
mark_as_advanced(WITH_GL_EGL)
if(WITH_GHOST_WAYLAND)
# Wayland can only use EGL to create OpenGL contexts, not GLX.
set(WITH_GL_EGL ON)
endif()
if(UNIX AND NOT APPLE)
if(WITH_GL_EGL)
# GLEW can only be built with either GLX or EGL support. Most binary distributions are
# built with GLX support and we have no automated way to detect this. So always build
# GLEW from source to be sure it has EGL support.
set(WITH_SYSTEM_GLEW OFF)
else()
option(WITH_SYSTEM_GLEW "Use GLEW OpenGL wrapper library provided by the operating system" OFF)
endif()
option(WITH_SYSTEM_GLES "Use OpenGL ES library provided by the operating system" ON)
else()
# System GLEW and GLES not an option on other platforms.
set(WITH_SYSTEM_GLEW OFF)
set(WITH_SYSTEM_GLES OFF)
endif()
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
option(WITH_GLEW_ES "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)" OFF)
option(WITH_GL_PROFILE_ES20 "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)" OFF)
option(WITH_GPU_BUILDTIME_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
option(WITH_GPU_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
mark_as_advanced(
WITH_OPENGL
WITH_GLEW_ES
WITH_GL_EGL
WITH_GL_PROFILE_ES20
WITH_GPU_BUILDTIME_SHADER_BUILDER
WITH_GPU_SHADER_BUILDER
)
if(WITH_HEADLESS)
set(WITH_OPENGL OFF)
endif()
# Metal
if (APPLE)

View File

@@ -29,12 +29,10 @@ cmake_minimum_required(VERSION 3.5)
include(ExternalProject)
include(cmake/check_software.cmake)
include(cmake/options.cmake)
# versions.cmake needs to be included after options.cmake due to the BLENDER_PLATFORM_ARM variable being needed.
include(cmake/versions.cmake)
include(cmake/options.cmake)
include(cmake/boost_build_options.cmake)
include(cmake/download.cmake)
include(cmake/macros.cmake)
if(ENABLE_MINGW64)
include(cmake/setup_mingw64.cmake)
@@ -98,15 +96,6 @@ include(cmake/fmt.cmake)
include(cmake/robinmap.cmake)
if(NOT APPLE)
include(cmake/xr_openxr.cmake)
if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
include(cmake/dpcpp.cmake)
include(cmake/dpcpp_deps.cmake)
endif()
if(NOT WIN32)
include(cmake/igc.cmake)
include(cmake/gmmlib.cmake)
include(cmake/ocloc.cmake)
endif()
endif()
# OpenColorIO and dependencies.

View File

@@ -56,7 +56,10 @@ if(UNIX)
"On Debian and Ubuntu:\n"
" apt install autoconf automake libtool yasm tcl ninja-build meson python3-mako\n"
"\n"
"On macOS (with homebrew):\n"
"On macOS Intel (with homebrew):\n"
" brew install autoconf automake bison libtool pkg-config yasm\n"
"\n"
"On macOS ARM (with homebrew):\n"
" brew install autoconf automake bison flex libtool pkg-config yasm\n"
"\n"
"Other platforms:\n"

View File

@@ -101,18 +101,3 @@ download_source(ROBINMAP)
download_source(IMATH)
download_source(PYSTRING)
download_source(LEVEL_ZERO)
download_source(DPCPP)
download_source(VCINTRINSICS)
download_source(OPENCLHEADERS)
download_source(ICDLOADER)
download_source(MP11)
download_source(SPIRV_HEADERS)
download_source(IGC)
download_source(IGC_LLVM)
download_source(IGC_OPENCL_CLANG)
download_source(IGC_VCINTRINSICS)
download_source(IGC_SPIRV_HEADERS)
download_source(IGC_SPIRV_TOOLS)
download_source(IGC_SPIRV_TRANSLATOR)
download_source(GMMLIB)
download_source(OCLOC)

View File

@@ -1,109 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
if(WIN32)
set(LLVM_GENERATOR "Ninja")
else()
set(LLVM_GENERATOR "Unix Makefiles")
endif()
set(DPCPP_CONFIGURE_ARGS
# When external deps dpcpp needs are not found it will automatically
# download the during the configure stage using FetchContent. Given
# we need to keep an archive of all source used during build for compliance
# reasons it CANNOT download anything we do not know about. By setting
# this property to ON, all downloads are disabled, and we will have to
# provide the missing deps some other way, a build error beats a compliance
# violation
--cmake-opt FETCHCONTENT_FULLY_DISCONNECTED=ON
)
set(DPCPP_SOURCE_ROOT ${BUILD_DIR}/dpcpp/src/external_dpcpp/)
set(DPCPP_EXTRA_ARGS
# When external deps dpcpp needs are not found it will automatically
# download the during the configure stage using FetchContent. Given
# we need to keep an archive of all source used during build for compliance
# reasons it CANNOT download anything we do not know about. By setting
# this property to ON, all downloads are disabled, and we will have to
# provide the missing deps some other way, a build or configure error
# beats a compliance violation
-DFETCHCONTENT_FULLY_DISCONNECTED=ON
-DLLVMGenXIntrinsics_SOURCE_DIR=${BUILD_DIR}/vcintrinsics/src/external_vcintrinsics/
-DOpenCL_HEADERS=file://${PACKAGE_DIR}/${OPENCLHEADERS_FILE}
-DOpenCL_LIBRARY_SRC=file://${PACKAGE_DIR}/${ICDLOADER_FILE}
-DBOOST_MP11_SOURCE_DIR=${BUILD_DIR}/mp11/src/external_mp11/
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
# Below here is copied from an invocation of buildbot/config.py
-DLLVM_ENABLE_ASSERTIONS=ON
-DLLVM_TARGETS_TO_BUILD=X86
-DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
-DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
-DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
-DLIBCLC_TARGETS_TO_BUILD=
-DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
-DSYCL_BUILD_PI_HIP_PLATFORM=AMD
-DLLVM_BUILD_TOOLS=ON
-DSYCL_ENABLE_WERROR=OFF
-DSYCL_INCLUDE_TESTS=ON
-DLLVM_ENABLE_DOXYGEN=OFF
-DLLVM_ENABLE_SPHINX=OFF
-DBUILD_SHARED_LIBS=OFF
-DSYCL_ENABLE_XPTI_TRACING=ON
-DLLVM_ENABLE_LLD=OFF
-DXPTI_ENABLE_WERROR=OFF
-DSYCL_CLANG_EXTRA_FLAGS=
-DSYCL_ENABLE_PLUGINS=level_zero
-DCMAKE_INSTALL_RPATH=\$ORIGIN
-DPython3_ROOT_DIR=${LIBDIR}/python/
-DPython3_EXECUTABLE=${PYTHON_BINARY}
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
-DLLDB_ENABLE_CURSES=OFF
-DLLVM_ENABLE_TERMINFO=OFF
)
if(WIN32)
list(APPEND DPCPP_EXTRA_ARGS -DPython3_FIND_REGISTRY=NEVER)
endif()
ExternalProject_Add(external_dpcpp
URL file://${PACKAGE_DIR}/${DPCPP_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${DPCPP_HASH_TYPE}=${DPCPP_HASH}
PREFIX ${BUILD_DIR}/dpcpp
CMAKE_GENERATOR ${LLVM_GENERATOR}
SOURCE_SUBDIR llvm
LIST_SEPARATOR ^^
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/dpcpp ${DEFAULT_CMAKE_FLAGS} ${DPCPP_EXTRA_ARGS}
#CONFIGURE_COMMAND ${PYTHON_BINARY} ${BUILD_DIR}/dpcpp/src/external_dpcpp/buildbot/configure.py ${DPCPP_CONFIGURE_ARGS}
#BUILD_COMMAND echo "." #${PYTHON_BINARY} ${BUILD_DIR}/dpcpp/src/external_dpcpp/buildbot/compile.py
INSTALL_COMMAND ${CMAKE_COMMAND} --build . -- deploy-sycl-toolchain
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/dpcpp/src/external_dpcpp < ${PATCH_DIR}/dpcpp.diff
INSTALL_DIR ${LIBDIR}/dpcpp
)
add_dependencies(
external_dpcpp
external_python
external_python_site_packages
external_vcintrinsics
external_openclheaders
external_icdloader
external_mp11
external_level-zero
external_spirvheaders
)
if(BUILD_MODE STREQUAL Release AND WIN32)
ExternalProject_Add_Step(external_dpcpp after_install
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
)
endif()

View File

@@ -1,61 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# These are build time requirements for dpcpp
# We only have to unpack these dpcpp will build
# them.
ExternalProject_Add(external_vcintrinsics
URL file://${PACKAGE_DIR}/${VCINTRINSICS_FILE}
URL_HASH ${VCINTRINSICS_HASH_TYPE}=${VCINTRINSICS_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/vcintrinsics
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
# opencl headers do not have to be unpacked, dpcpp will do it
# but it wouldn't hurt to do it anyway as an opertunity to validate
# the hash is correct.
ExternalProject_Add(external_openclheaders
URL file://${PACKAGE_DIR}/${OPENCLHEADERS_FILE}
URL_HASH ${OPENCLHEADERS_HASH_TYPE}=${OPENCLHEADERS_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/openclheaders
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
# icdloader does not have to be unpacked, dpcpp will do it
# but it wouldn't hurt to do it anyway as an opertunity to validate
# the hash is correct.
ExternalProject_Add(external_icdloader
URL file://${PACKAGE_DIR}/${ICDLOADER_FILE}
URL_HASH ${ICDLOADER_HASH_TYPE}=${ICDLOADER_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/icdloader
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
ExternalProject_Add(external_mp11
URL file://${PACKAGE_DIR}/${MP11_FILE}
URL_HASH ${MP11_HASH_TYPE}=${MP11_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/mp11
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
ExternalProject_Add(external_spirvheaders
URL file://${PACKAGE_DIR}/${SPIRV_HEADERS_FILE}
URL_HASH ${SPIRV_HEADERS_HASH_TYPE}=${SPIRV_HEADERS_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/spirvheaders
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)

View File

@@ -10,12 +10,18 @@ set(EMBREE_EXTRA_ARGS
-DEMBREE_RAY_MASK=ON
-DEMBREE_FILTER_FUNCTION=ON
-DEMBREE_BACKFACE_CULLING=OFF
-DEMBREE_MAX_ISA=AVX2
-DEMBREE_TASKING_SYSTEM=TBB
-DEMBREE_TBB_ROOT=${LIBDIR}/tbb
-DTBB_ROOT=${LIBDIR}/tbb
-DTBB_STATIC_LIB=${TBB_STATIC_LIBRARY}
)
if (NOT BLENDER_PLATFORM_ARM)
if(BLENDER_PLATFORM_ARM)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
-DEMBREE_MAX_ISA=NEON)
else()
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
-DEMBREE_MAX_ISA=AVX2)
@@ -24,10 +30,23 @@ endif()
if(TBB_STATIC_LIBRARY)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
-DEMBREE_TBB_COMPONENT=tbb_static
-DEMBREE_TBB_LIBRARY_NAME=tbb_static
-DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_static
)
endif()
if(WIN32)
set(EMBREE_BUILD_DIR ${BUILD_MODE}/)
if(BUILD_MODE STREQUAL Debug)
list(APPEND EMBREE_EXTRA_ARGS
-DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_debug
-DEMBREE_TBB_LIBRARY_NAME=tbb_debug
)
endif()
else()
set(EMBREE_BUILD_DIR)
endif()
ExternalProject_Add(external_embree
URL file://${PACKAGE_DIR}/${EMBREE_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}

View File

@@ -5,8 +5,6 @@ ExternalProject_Add(external_flex
URL_HASH ${FLEX_HASH_TYPE}=${FLEX_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/flex
# This patch fixes build with some versions of glibc (https://github.com/westes/flex/commit/24fd0551333e7eded87b64dd36062da3df2f6380)
PATCH_COMMAND ${PATCH_CMD} -d ${BUILD_DIR}/flex/src/external_flex < ${PATCH_DIR}/flex.diff
CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && ${CONFIGURE_COMMAND} --prefix=${LIBDIR}/flex
BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make -j${MAKE_THREADS}
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make install

View File

@@ -1,13 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
set(GMMLIB_EXTRA_ARGS
)
ExternalProject_Add(external_gmmlib
URL file://${PACKAGE_DIR}/${GMMLIB_FILE}
URL_HASH ${GMMLIB_HASH_TYPE}=${GMMLIB_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/gmmlib
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/gmmlib ${DEFAULT_CMAKE_FLAGS} ${GMMLIB_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/gmmlib
)

View File

@@ -192,10 +192,6 @@ harvest(zstd/lib zstd/lib "*.a")
if(UNIX AND NOT APPLE)
harvest(libglu/lib mesa/lib "*.so*")
harvest(mesa/lib64 mesa/lib "*.so*")
harvest(dpcpp dpcpp "*")
harvest(igc dpcpp/lib/igc "*")
harvest(ocloc dpcpp/lib/ocloc "*")
endif()
endif()
endif()

View File

@@ -1,126 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
unpack_only(igc_vcintrinsics)
unpack_only(igc_spirv_headers)
unpack_only(igc_spirv_tools)
#
# igc_opencl_clang contains patches that need to be applied
# to external_igc_llvm and igc_spirv_translator, we unpack
# igc_opencl_clang first, then have the patch stages of
# external_igc_llvm and igc_spirv_translator apply them.
#
ExternalProject_Add(external_igc_opencl_clang
URL file://${PACKAGE_DIR}/${IGC_OPENCL_CLANG_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${IGC_OPENCL_CLANG_HASH_TYPE}=${IGC_OPENCL_CLANG_HASH}
PREFIX ${BUILD_DIR}/igc_opencl_clang
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/ < ${PATCH_DIR}/igc_opencl_clang.diff
)
set(IGC_OPENCL_CLANG_PATCH_DIR ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/patches)
set(IGC_LLVM_SOURCE_DIR ${BUILD_DIR}/igc_llvm/src/external_igc_llvm)
set(IGC_SPIRV_TRANSLATOR_SOURCE_DIR ${BUILD_DIR}/igc_spirv_translator/src/external_igc_spirv_translator)
ExternalProject_Add(external_igc_llvm
URL file://${PACKAGE_DIR}/${IGC_LLVM_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${IGC_LLVM_HASH_TYPE}=${IGC_LLVM_HASH}
PREFIX ${BUILD_DIR}/igc_llvm
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0001-OpenCL-3.0-support.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0002-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0003-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
)
add_dependencies(
external_igc_llvm
external_igc_opencl_clang
)
ExternalProject_Add(external_igc_spirv_translator
URL file://${PACKAGE_DIR}/${IGC_SPIRV_TRANSLATOR_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${IGC_SPIRV_TRANSLATOR_HASH_TYPE}=${IGC_SPIRV_TRANSLATOR_HASH}
PREFIX ${BUILD_DIR}/igc_spirv_translator
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
)
add_dependencies(
external_igc_spirv_translator
external_igc_opencl_clang
)
if(WIN32)
set(IGC_GENERATOR "Ninja")
set(IGC_TARGET Windows64)
else()
set(IGC_GENERATOR "Unix Makefiles")
set(IGC_TARGET Linux64)
endif()
set(IGC_EXTRA_ARGS
-DIGC_OPTION__ARCHITECTURE_TARGET=${IGC_TARGET}
-DIGC_OPTION__ARCHITECTURE_HOST=${IGC_TARGET}
)
if(UNIX AND NOT APPLE)
list(APPEND IGC_EXTRA_ARGS
-DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
-DFLEX_INCLUDE_DIR=${LIBDIR}/flex/include
)
endif()
ExternalProject_Add(external_igc
URL file://${PACKAGE_DIR}/${IGC_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${IGC_HASH_TYPE}=${IGC_HASH}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/igc ${DEFAULT_CMAKE_FLAGS} ${IGC_EXTRA_ARGS}
# IGC is pretty set in its way where sub projects ought to live, for some it offers
# hooks to supply alternatives folders, other are just hardocded with no way to configure
# we symlink everything here, since it's less work than trying to convince the cmake
# scripts to accept alternative locations.
#
PATCH_COMMAND ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_llvm/src/external_igc_llvm/ ${BUILD_DIR}/igc/src/llvm-project &&
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/ ${BUILD_DIR}/igc/src/llvm-project/llvm/projects/opencl-clang &&
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_translator/src/external_igc_spirv_translator/ ${BUILD_DIR}/igc/src/llvm-project/llvm/projects/llvm-spirv &&
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_tools/src/external_igc_spirv_tools/ ${BUILD_DIR}/igc/src/SPIRV-Tools &&
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_headers/src/external_igc_spirv_headers/ ${BUILD_DIR}/igc/src/SPIRV-Headers &&
${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_vcintrinsics/src/external_igc_vcintrinsics/ ${BUILD_DIR}/igc/src/vc-intrinsics
PREFIX ${BUILD_DIR}/igc
INSTALL_DIR ${LIBDIR}/igc
INSTALL_COMMAND ${CMAKE_COMMAND} --install . --strip
CMAKE_GENERATOR ${IGC_GENERATOR}
)
add_dependencies(
external_igc
external_igc_vcintrinsics
external_igc_llvm
external_igc_opencl_clang
external_igc_vcintrinsics
external_igc_spirv_headers
external_igc_spirv_tools
external_igc_spirv_translator
)
if(UNIX AND NOT APPLE)
add_dependencies(
external_igc
external_flex
)
endif()

View File

@@ -6,7 +6,6 @@ if(WIN32)
-DBISON_EXECUTABLE=${LIBDIR}/flexbison/win_bison.exe
-DM4_EXECUTABLE=${DOWNLOAD_DIR}/mingw/mingw64/msys/1.0/bin/m4.exe
-DARM_ENABLED=Off
-DPython3_FIND_REGISTRY=NEVER
)
elseif(APPLE)
# Use bison and flex installed via Homebrew.
@@ -28,7 +27,7 @@ elseif(UNIX)
set(ISPC_EXTRA_ARGS_UNIX
-DCMAKE_C_COMPILER=${LIBDIR}/llvm/bin/clang
-DCMAKE_CXX_COMPILER=${LIBDIR}/llvm/bin/clang++
-DARM_ENABLED=${BLENDER_PLATFORM_ARM}
-DARM_ENABLED=Off
-DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
)
endif()
@@ -44,8 +43,6 @@ set(ISPC_EXTRA_ARGS
-DISPC_INCLUDE_TESTS=Off
-DCLANG_LIBRARY_DIR=${LIBDIR}/llvm/lib
-DCLANG_INCLUDE_DIRS=${LIBDIR}/llvm/include
-DPython3_ROOT_DIR=${LIBDIR}/python/
-DPython3_EXECUTABLE=${PYTHON_BINARY}
${ISPC_EXTRA_ARGS_WIN}
${ISPC_EXTRA_ARGS_APPLE}
${ISPC_EXTRA_ARGS_UNIX}
@@ -64,7 +61,6 @@ ExternalProject_Add(external_ispc
add_dependencies(
external_ispc
ll
external_python
)
if(WIN32)

View File

@@ -82,3 +82,4 @@ add_dependencies(
ll
external_python
)

View File

@@ -1,18 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# shorthand to only unpack a certain dependency
macro(unpack_only name)
string(TOUPPER ${name} UPPER_NAME)
set(TARGET_FILE ${${UPPER_NAME}_FILE})
set(TARGET_HASH_TYPE ${${UPPER_NAME}_HASH_TYPE})
set(TARGET_HASH ${${UPPER_NAME}_HASH})
ExternalProject_Add(external_${name}
URL file://${PACKAGE_DIR}/${TARGET_FILE}
URL_HASH ${TARGET_HASH_TYPE}=${TARGET_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/${name}
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
)
endmacro()

View File

@@ -1,24 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
set(OCLOC_EXTRA_ARGS
-DNEO_SKIP_UNIT_TESTS=1
-DNEO_BUILD_WITH_OCL=0
-DBUILD_WITH_L0=0
-DIGC_DIR=${LIBDIR}/igc
-DGMM_DIR=${LIBDIR}/gmmlib
)
ExternalProject_Add(external_ocloc
URL file://${PACKAGE_DIR}/${OCLOC_FILE}
URL_HASH ${OCLOC_HASH_TYPE}=${OCLOC_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/ocloc
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/ocloc ${DEFAULT_CMAKE_FLAGS} ${OCLOC_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/ocloc
)
add_dependencies(
external_ocloc
external_igc
external_gmmlib
)

View File

@@ -9,7 +9,6 @@ set(OIDN_EXTRA_ARGS
-DOIDN_STATIC_RUNTIME=OFF
-DISPC_EXECUTABLE=${LIBDIR}/ispc/bin/ispc
-DOIDN_FILTER_RTLIGHTMAP=OFF
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
)
if(WIN32)
@@ -39,7 +38,6 @@ add_dependencies(
external_openimagedenoise
external_tbb
external_ispc
external_python
)
if(WIN32)

View File

@@ -38,7 +38,6 @@ message("BUILD_DIR = ${BUILD_DIR}")
if(WIN32)
set(PATCH_CMD ${DOWNLOAD_DIR}/mingw/mingw64/msys/1.0/bin/patch.exe)
set(LIBEXT ".lib")
set(SHAREDLIBEXT ".lib")
set(LIBPREFIX "")
# For OIIO and OSL
@@ -97,7 +96,6 @@ if(WIN32)
else()
set(PATCH_CMD patch)
set(LIBEXT ".a")
set(SHAREDLIBEXT ".so")
set(LIBPREFIX "lib")
if(APPLE)

View File

@@ -147,7 +147,7 @@ set(OPENIMAGEIO_HASH de45fb38501c4581062b522b53b6141c)
set(OPENIMAGEIO_HASH_TYPE MD5)
set(OPENIMAGEIO_FILE OpenImageIO-${OPENIMAGEIO_VERSION}.tar.gz)
# 8.0.0 is currently oiio's preferred version although never versions may be available.
# 8.0.0 is currently oiio's preferred vesion although never versions may be available.
# the preferred version can be found in oiio's externalpackages.cmake
set(FMT_VERSION 8.0.0)
set(FMT_URI https://github.com/fmtlib/fmt/archive/refs/tags/${FMT_VERSION}.tar.gz)
@@ -155,7 +155,7 @@ set(FMT_HASH 7bce0e9e022e586b178b150002e7c2339994e3c2bbe44027e9abb0d60f9cce83)
set(FMT_HASH_TYPE SHA256)
set(FMT_FILE fmt-${FMT_VERSION}.tar.gz)
# 0.6.2 is currently oiio's preferred version although never versions may be available.
# 0.6.2 is currently oiio's preferred vesion although never versions may be available.
# the preferred version can be found in oiio's externalpackages.cmake
set(ROBINMAP_VERSION v0.6.2)
set(ROBINMAP_URI https://github.com/Tessil/robin-map/archive/refs/tags/${ROBINMAP_VERSION}.tar.gz)
@@ -410,9 +410,9 @@ set(SQLITE_HASH fb558c49ee21a837713c4f1e7e413309aabdd9c7)
set(SQLITE_HASH_TYPE SHA1)
set(SQLITE_FILE sqlite-src-3240000.zip)
set(EMBREE_VERSION 3.13.4)
set(EMBREE_VERSION 3.13.3)
set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
set(EMBREE_HASH 52d0be294d6c88ba7a6c9e046796e7be)
set(EMBREE_HASH f62766ba54e48a2f327c3a22596e7133)
set(EMBREE_HASH_TYPE MD5)
set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)
@@ -502,134 +502,3 @@ set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${
set(LEVEL_ZERO_HASH c39bb05a8e5898aa6c444e1704105b93d3f1888b9c333f8e7e73825ffbfb2617)
set(LEVEL_ZERO_HASH_TYPE SHA256)
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
set(DPCPP_VERSION 20220620)
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH a5f41abd5229d28afa92cbd8a5d8d786ee698bf239f722929fd686276bad692c)
set(DPCPP_HASH_TYPE SHA256)
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
########################
### DPCPP DEPS BEGIN ###
########################
# The following deps are build time requirements for dpcpp, when possible
# the source in the dpcpp source tree for the version chosen is documented
# by each dep, these will only have to be downloaded and unpacked, dpcpp
# will take care of building them, unpack is being done in dpcpp_deps.cmake
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
set(VCINTRINSICS_VERSION 984bb27baacce6ee5c716c2e64845f2a1928025b)
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
set(VCINTRINSICS_HASH abea415a15a0dd11fdc94dee8fb462910f2548311b787e02f42509789e1b0d7b)
set(VCINTRINSICS_HASH_TYPE SHA256)
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
# Source opencl/CMakeLists.txt
set(OPENCLHEADERS_VERSION dcd5bede6859d26833cd85f0d6bbcee7382dc9b3)
set(OPENCLHEADERS_URI https://github.com/KhronosGroup/OpenCL-Headers/archive/${OPENCLHEADERS_VERSION}.tar.gz)
set(OPENCLHEADERS_HASH ca8090359654e94f2c41e946b7e9d826253d795ae809ce7c83a7d3c859624693)
set(OPENCLHEADERS_HASH_TYPE SHA256)
set(OPENCLHEADERS_FILE opencl_headers-${OPENCLHEADERS_VERSION}.tar.gz)
# Source opencl/CMakeLists.txt
set(ICDLOADER_VERSION aec3952654832211636fc4af613710f80e203b0a)
set(ICDLOADER_URI https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/${ICDLOADER_VERSION}.tar.gz)
set(ICDLOADER_HASH e1880551d67bd8dc31d13de63b94bbfd6b1f315b6145dad1ffcd159b89bda93c)
set(ICDLOADER_HASH_TYPE SHA256)
set(ICDLOADER_FILE icdloader-${ICDLOADER_VERSION}.tar.gz)
# Source sycl/cmake/modules/AddBoostMp11Headers.cmake
# Using external MP11 here, getting AddBoostMp11Headers.cmake to recognize
# our copy in boost directly was more trouble than it was worth.
set(MP11_VERSION 7bc4e1ae9b36ec8ee635c3629b59ec525bbe82b9)
set(MP11_URI https://github.com/boostorg/mp11/archive/${MP11_VERSION}.tar.gz)
set(MP11_HASH 071ee2bd3952ec89882edb3af25dd1816f6b61723f66e42eea32f4d02ceef426)
set(MP11_HASH_TYPE SHA256)
set(MP11_FILE mp11-${MP11_VERSION}.tar.gz)
# Source llvm-spirv/CMakeLists.txt (repo)
# Source llvm-spirv/spirv-headers-tag.conf (hash)
set(SPIRV_HEADERS_VERSION 36c0c1596225e728bd49abb7ef56a3953e7ed468)
set(SPIRV_HEADERS_URI https://github.com/KhronosGroup/SPIRV-Headers/archive/${SPIRV_HEADERS_VERSION}.tar.gz)
set(SPIRV_HEADERS_HASH 7a5c89633f8740456fe8adee052033e134476d267411d1336c0cb1e587a9229a)
set(SPIRV_HEADERS_HASH_TYPE SHA256)
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
######################
### DPCPP DEPS END ###
######################
##########################################
### Intel Graphics Compiler DEPS BEGIN ###
##########################################
# The following deps are build time requirements for the intel graphics
# compiler, the versions used are taken from the following location
# https://github.com/intel/intel-graphics-compiler/releases
set(IGC_VERSION 1.0.11222)
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
set(IGC_HASH d92f0608dcbb52690855685f9447282e5c09c0ba98ae35fabf114fcf8b1e9fcf)
set(IGC_HASH_TYPE SHA256)
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
set(IGC_LLVM_VERSION llvmorg-11.1.0)
set(IGC_LLVM_URI https://github.com/llvm/llvm-project/archive/refs/tags/${IGC_LLVM_VERSION}.tar.gz)
set(IGC_LLVM_HASH 53a0719f3f4b0388013cfffd7b10c7d5682eece1929a9553c722348d1f866e79)
set(IGC_LLVM_HASH_TYPE SHA256)
set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
# WARNING WARNING WARNING
#
# IGC_OPENCL_CLANG contains patches for some of its dependencies.
#
# Whenever IGC_OPENCL_CLANG_VERSION changes, one *MUST* inspect
# IGC_OPENCL_CLANG's patches folder and update igc.cmake to account for
# any added or removed patches.
#
# WARNING WARNING WARNING
set(IGC_OPENCL_CLANG_VERSION bbdd1587f577397a105c900be114b56755d1f7dc)
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_OPENCL_CLANG_HASH d08315f1b0d8a6fef33de2b3e6aa7356534c324910634962c72523d970773efc)
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_VERSION v0.4.0)
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_HASH c8b92682ad5031cf9d5b82a40e7d5c0e763cd9278660adbcaa69aab988e4b589)
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
set(IGC_SPIRV_HEADERS_VERSION sdk-1.3.204.1)
set(IGC_SPIRV_HEADERS_URI https://github.com/KhronosGroup/SPIRV-Headers/archive/refs/tags/${IGC_SPIRV_HEADERS_VERSION}.tar.gz)
set(IGC_SPIRV_HEADERS_HASH 262864053968c217d45b24b89044a7736a32361894743dd6cfe788df258c746c)
set(IGC_SPIRV_HEADERS_HASH_TYPE SHA256)
set(IGC_SPIRV_HEADERS_FILE SPIR-V-Headers-${IGC_SPIRV_HEADERS_VERSION}.tar.gz)
set(IGC_SPIRV_TOOLS_VERSION sdk-1.3.204.1)
set(IGC_SPIRV_TOOLS_URI https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc103b087335)
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_VERSION 99420daab98998a7e36858befac9c5ed109d4920)
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_HASH 77dfb4ddb6bfb993535562c02ddea23f0a0d1c5a0258c1afe7e27c894ff783a8)
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
########################################
### Intel Graphics Compiler DEPS END ###
########################################
set(GMMLIB_VERSION intel-gmmlib-22.1.2)
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
set(GMMLIB_HASH 3b9a6d5e7e3f5748b3d0a2fb0e980ae943907fece0980bd9c0508e71c838e334)
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 22.20.23198)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH ab22b8bf2560a57fdd3def0e35a62ca75991406f959c0263abb00cd6cd9ae998)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@@ -465,7 +465,7 @@ TBB_VERSION="2020"
TBB_VERSION_SHORT="2020"
TBB_VERSION_UPDATE="_U3" # Used for source packages...
TBB_VERSION_MIN="2018"
TBB_VERSION_MEX="2021" # 2021 introduces 'oneTBB', which has lots of compatibility breakage with previous versions
TBB_VERSION_MEX="2022"
TBB_FORCE_BUILD=false
TBB_FORCE_REBUILD=false
TBB_SKIP=false
@@ -567,7 +567,7 @@ OPENCOLLADA_FORCE_BUILD=false
OPENCOLLADA_FORCE_REBUILD=false
OPENCOLLADA_SKIP=false
EMBREE_VERSION="3.13.4"
EMBREE_VERSION="3.13.3"
EMBREE_VERSION_SHORT="3.13"
EMBREE_VERSION_MIN="3.13"
EMBREE_VERSION_MEX="4.0"
@@ -635,6 +635,9 @@ MP3LAME_DEV=""
OPENJPEG_USE=false
OPENJPEG_DEV=""
# Whether to use system GLEW or not (OpenSubDiv needs recent glew to work).
NO_SYSTEM_GLEW=false
# Switch to english language, else some things (like check_package_DEB()) won't work!
LANG_BACK=$LANG
LANG=""
@@ -1190,7 +1193,7 @@ Those libraries should be available as packages in all recent distributions (opt
* libx11, libxcursor, libxi, libxrandr, libxinerama (and other libx... as needed).
* libwayland-client0, libwayland-cursor0, libwayland-egl1, libxkbcommon0, libdbus-1-3, libegl1 (Wayland)
* libsqlite3, libzstd, libbz2, libssl, libfftw3, libxml2, libtinyxml, yasm, libyaml-cpp, flex.
* libsdl2, libglew, libpugixml, libpotrace, [libgmp], fontconfig, [libharu/libhpdf].\""
* libsdl2, libglew, libpugixml, libpotrace, [libgmp], [libglewmx], fontconfig, [libharu/libhpdf].\""
DEPS_SPECIFIC_INFO="\"BUILDABLE DEPENDENCIES:
@@ -1684,7 +1687,7 @@ compile_TBB() {
fi
# To be changed each time we make edits that would modify the compiled result!
tbb_magic=1
tbb_magic=0
_init_tbb
# Force having own builds for the dependencies.
@@ -2693,13 +2696,14 @@ compile_OSD() {
mkdir build
cd build
cmake_d="-D CMAKE_BUILD_TYPE=Release"
if [ -d $INST/tbb ]; then
cmake_d="$cmake_d -D TBB_LOCATION=$INST/tbb"
cmake_d="$cmake_d $cmake_d -D TBB_LOCATION=$INST/tbb"
fi
cmake_d="-D CMAKE_BUILD_TYPE=Release"
cmake_d="$cmake_d -D CMAKE_INSTALL_PREFIX=$_inst"
# ptex is only needed when nicholas bishop is ready
cmake_d="$cmake_d -D NO_PTEX=1"
cmake_d="$cmake_d -D NO_CLEW=1 -D NO_CUDA=1 -D NO_OPENCL=1 -D NO_GLEW=1"
cmake_d="$cmake_d -D NO_CLEW=1 -D NO_CUDA=1 -D NO_OPENCL=1"
# maya plugin, docs, tutorials, regression tests and examples are not needed
cmake_d="$cmake_d -D NO_MAYA=1 -D NO_DOC=1 -D NO_TUTORIALS=1 -D NO_REGRESSION=1 -DNO_EXAMPLES=1"
@@ -3322,7 +3326,7 @@ compile_Embree() {
fi
# To be changed each time we make edits that would modify the compiled results!
embree_magic=11
embree_magic=10
_init_embree
# Force having own builds for the dependencies.
@@ -3382,7 +3386,7 @@ compile_Embree() {
cmake_d="$cmake_d -D EMBREE_TASKING_SYSTEM=TBB"
if [ -d $INST/tbb ]; then
cmake_d="$cmake_d -D EMBREE_TBB_ROOT=$INST/tbb"
make_d="$make_d EMBREE_TBB_ROOT=$INST/tbb"
fi
cmake $cmake_d ../
@@ -3521,7 +3525,7 @@ compile_OIDN() {
install_ISPC
# To be changed each time we make edits that would modify the compiled results!
oidn_magic=10
oidn_magic=9
_init_oidn
# Force having own builds for the dependencies.
@@ -3577,7 +3581,7 @@ compile_OIDN() {
cmake_d="$cmake_d -D ISPC_DIR_HINT=$_ispc_path_bin"
if [ -d $INST/tbb ]; then
cmake_d="$cmake_d -D TBB_ROOT=$INST/tbb"
make_d="$make_d TBB_ROOT=$INST/tbb"
fi
cmake $cmake_d ../
@@ -4058,6 +4062,7 @@ install_DEB() {
libopenal-dev libglew-dev yasm \
libsdl2-dev libfftw3-dev patch bzip2 libxml2-dev libtinyxml-dev libjemalloc-dev \
libgmp-dev libpugixml-dev libpotrace-dev libhpdf-dev libzstd-dev libpystring-dev"
# libglewmx-dev (broken in deb testing currently...)
VORBIS_USE=true
OGG_USE=true
@@ -4166,7 +4171,7 @@ install_DEB() {
fi
fi
# Check cmake version and disable features for older distros.
# Check cmake/glew versions and disable features for older distros.
# This is so Blender can at least compile.
PRINT ""
_cmake=`get_package_version_DEB cmake`
@@ -4183,6 +4188,28 @@ install_DEB() {
fi
fi
PRINT ""
_glew=`get_package_version_DEB libglew-dev`
if [ -z $_glew ]; then
# Stupid virtual package in Ubuntu 12.04 doesn't show version number...
_glew=`apt-cache showpkg libglew-dev|tail -n1|awk '{print $2}'|sed 's/-.*//'`
fi
version_ge $_glew "1.9.0"
if [ $? -eq 1 ]; then
version_ge $_glew "1.7.0"
if [ $? -eq 1 ]; then
WARNING "OpenSubdiv disabled because GLEW-$_glew is not enough"
WARNING "Blender will not use system GLEW library"
OSD_SKIP=true
NO_SYSTEM_GLEW=true
else
WARNING "OpenSubdiv will compile with GLEW-$_glew but with limited capability"
WARNING "Blender will not use system GLEW library"
NO_SYSTEM_GLEW=true
fi
fi
PRINT ""
_do_compile_python=false
if [ "$PYTHON_SKIP" = true ]; then
@@ -6263,6 +6290,12 @@ print_info() {
fi
fi
if [ "$NO_SYSTEM_GLEW" = true ]; then
_1="-D WITH_SYSTEM_GLEW=OFF"
PRINT " $_1"
_buildargs="$_buildargs $_1"
fi
if [ "$FFMPEG_SKIP" = false ]; then
_1="-D WITH_CODEC_FFMPEG=ON"
PRINT " $_1"

View File

@@ -1,54 +0,0 @@
diff -Naur external_dpcpp.orig/sycl/source/CMakeLists.txt external_dpcpp/sycl/source/CMakeLists.txt
--- external_dpcpp.orig/sycl/source/CMakeLists.txt 2022-05-20 04:19:45.067771362 +0000
+++ external_dpcpp/sycl/source/CMakeLists.txt 2022-05-20 04:21:49.708025048 +0000
@@ -66,10 +66,10 @@
target_compile_options(${LIB_OBJ_NAME} PUBLIC
-fvisibility=hidden -fvisibility-inlines-hidden)
set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/ld-version-script.txt")
- set(abi_linker_script "${CMAKE_CURRENT_SOURCE_DIR}/abi_replacements_linux.txt")
- target_link_libraries(
- ${LIB_NAME} PRIVATE "-Wl,${abi_linker_script}")
- set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${abi_linker_script})
+# set(abi_linker_script "${CMAKE_CURRENT_SOURCE_DIR}/abi_replacements_linux.txt")
+# target_link_libraries(
+# ${LIB_NAME} PRIVATE "-Wl,${abi_linker_script}")
+# set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${abi_linker_script})
target_link_libraries(
${LIB_NAME} PRIVATE "-Wl,--version-script=${linker_script}")
set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${linker_script})
diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt
--- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt 2022-04-29 13:47:11 -0600
+++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt 2022-05-21 15:25:06 -0600
@@ -11,6 +11,11 @@
)
endif()
+# Blender code below is determined to use FetchContent_Declare
+# temporarily allow it (but feed it our downloaded tarball
+# in the OpenCL_HEADERS variable
+set(FETCHCONTENT_FULLY_DISCONNECTED OFF)
+
# Repo URLs
set(OCL_HEADERS_REPO
@@ -77,5 +82,6 @@
FetchContent_MakeAvailable(ocl-icd)
add_library(OpenCL-ICD ALIAS OpenCL)
+set(FETCHCONTENT_FULLY_DISCONNECTED ON)
add_subdirectory(opencl-aot)
diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake
--- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-02-08 09:17:24 -0700
+++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-05-24 11:35:51 -0600
@@ -36,7 +36,9 @@
add_custom_target(libsycldevice-obj)
add_custom_target(libsycldevice-spv)
-add_custom_target(libsycldevice DEPENDS
+# Blender: add ALL here otherwise this target will not build
+# and cause an error due to missing files during the install phase.
+add_custom_target(libsycldevice ALL DEPENDS
libsycldevice-obj
libsycldevice-spv)

View File

@@ -1,37 +1,30 @@
diff -Naur org/kernels/rtcore_config.h.in embree-3.13.4/kernels/rtcore_config.h.in
--- org/kernels/rtcore_config.h.in 2022-06-14 22:13:52 -0600
+++ embree-3.13.4/kernels/rtcore_config.h.in 2022-06-24 15:20:12 -0600
@@ -14,6 +14,7 @@
#cmakedefine01 EMBREE_MIN_WIDTH
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
+#cmakedefine EMBREE_STATIC_LIB
#cmakedefine EMBREE_API_NAMESPACE
#if defined(EMBREE_API_NAMESPACE)
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
index 7c2f43d..106b1d5 100644
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@@ -201,6 +201,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
#message("AVX2: ${EMBREE_LIBRARY_FILES_AVX2}")
#message("AVX512: ${EMBREE_LIBRARY_FILES_AVX512}")
diff -Naur orig/common/sys/platform.h external_embree/common/sys/platform.h
--- orig/common/sys/platform.h 2020-05-13 23:08:53 -0600
+++ external_embree/common/sys/platform.h 2020-06-13 17:40:26 -0600
@@ -84,8 +84,8 @@
////////////////////////////////////////////////////////////////////////////////
+# Bundle Neon2x into the main static library.
+IF(EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB)
+ LIST(APPEND EMBREE_LIBRARY_FILES ${EMBREE_LIBRARY_FILES_AVX2})
+ LIST(REMOVE_DUPLICATES EMBREE_LIBRARY_FILES)
+ENDIF()
+
# replaces all .cpp files with a dummy file that includes that .cpp file
# this is to work around an ICC name mangling issue related to lambda functions under windows
MACRO (CreateISADummyFiles list isa)
@@ -277,7 +283,7 @@ IF (EMBREE_ISA_AVX AND EMBREE_LIBRARY_FILES_AVX)
ENDIF()
ENDIF()
-IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2)
+IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2 AND NOT (EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB))
DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2})
ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2})
TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking)
#ifdef __WIN32__
-#define dll_export __declspec(dllexport)
-#define dll_import __declspec(dllimport)
+#define dll_export
+#define dll_import
#else
#define dll_export __attribute__ ((visibility ("default")))
#define dll_import
diff --git orig/common/tasking/CMakeLists.txt external_embree/common/tasking/CMakeLists.txt
--- orig/common/tasking/CMakeLists.txt
+++ external_embree/common/tasking/CMakeLists.txt
@@ -27,7 +27,11 @@
else()
# If not found try getting older TBB via module (FindTBB.cmake)
unset(TBB_DIR CACHE)
- find_package(TBB 4.1 REQUIRED tbb)
+ if (TBB_STATIC_LIB)
+ find_package(TBB 4.1 REQUIRED tbb_static)
+ else()
+ find_package(TBB 4.1 REQUIRED tbb)
+ endif()
if (TBB_FOUND)
TARGET_LINK_LIBRARIES(tasking PUBLIC TBB)
TARGET_INCLUDE_DIRECTORIES(tasking PUBLIC "${TBB_INCLUDE_DIRS}")

View File

@@ -1,15 +0,0 @@
diff --git a/configure.ac b/configure.ac
index c6f12d644..3c977a4e3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -25,8 +25,10 @@
# autoconf requirements and initialization
AC_INIT([the fast lexical analyser generator],[2.6.4],[flex-help@lists.sourceforge.net],[flex])
+AC_PREREQ([2.60])
AC_CONFIG_SRCDIR([src/scan.l])
AC_CONFIG_AUX_DIR([build-aux])
+AC_USE_SYSTEM_EXTENSIONS
LT_INIT
AM_INIT_AUTOMAKE([1.15 -Wno-portability foreign std-options dist-lzip parallel-tests subdir-objects])
AC_CONFIG_HEADER([src/config.h])

View File

@@ -1,44 +0,0 @@
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
@@ -126,22 +126,24 @@
)
endif()
-
- set(SPIRV_BASE_REVISION llvm_release_110)
- set(TARGET_BRANCH "ocl-open-110")
- get_filename_component(LLVM_MONOREPO_DIR ${LLVM_SOURCE_DIR} DIRECTORY)
- set(LLVM_PATCHES_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm
- ${CMAKE_CURRENT_SOURCE_DIR}/patches/clang)
- apply_patches(${LLVM_MONOREPO_DIR}
- "${LLVM_PATCHES_DIRS}"
- ${LLVM_BASE_REVISION}
- ${TARGET_BRANCH}
- ret)
- apply_patches(${SPIRV_SOURCE_DIR}
- ${CMAKE_CURRENT_SOURCE_DIR}/patches/spirv
- ${SPIRV_BASE_REVISION}
- ${TARGET_BRANCH}
- ret)
+ #
+ # Blender: Why apply these manually in igc.cmake
+ #
+ #set(SPIRV_BASE_REVISION llvm_release_110)
+ #set(TARGET_BRANCH "ocl-open-110")
+ #get_filename_component(LLVM_MONOREPO_DIR ${LLVM_SOURCE_DIR} DIRECTORY)
+ #set(LLVM_PATCHES_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm
+ # ${CMAKE_CURRENT_SOURCE_DIR}/patches/clang)
+ #apply_patches(${LLVM_MONOREPO_DIR}
+ # "${LLVM_PATCHES_DIRS}"
+ # ${LLVM_BASE_REVISION}
+ # ${TARGET_BRANCH}
+ # ret)
+ #apply_patches(${SPIRV_SOURCE_DIR}
+ # ${CMAKE_CURRENT_SOURCE_DIR}/patches/spirv
+ # ${SPIRV_BASE_REVISION}
+ # ${TARGET_BRANCH}
+ # ret)
endif(NOT USE_PREBUILT_LLVM)
#

View File

@@ -1,56 +0,0 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2021-2022 Intel Corporation
# - Find Level Zero library
# Find Level Zero headers and libraries needed by oneAPI implementation
# This module defines
# LEVEL_ZERO_LIBRARY, libraries to link against in order to use L0.
# LEVEL_ZERO_INCLUDE_DIR, directories where L0 headers can be found.
# LEVEL_ZERO_ROOT_DIR, The base directory to search for L0 files.
# This can also be an environment variable.
# LEVEL_ZERO_FOUND, If false, then don't try to use L0.
IF(NOT LEVEL_ZERO_ROOT_DIR AND NOT $ENV{LEVEL_ZERO_ROOT_DIR} STREQUAL "")
SET(LEVEL_ZERO_ROOT_DIR $ENV{LEVEL_ZERO_ROOT_DIR})
ENDIF()
SET(_level_zero_search_dirs
${LEVEL_ZERO_ROOT_DIR}
/usr/lib
/usr/local/lib
)
FIND_LIBRARY(_LEVEL_ZERO_LIBRARY
NAMES
ze_loader
HINTS
${_level_zero_search_dirs}
PATH_SUFFIXES
lib64 lib
)
FIND_PATH(_LEVEL_ZERO_INCLUDE_DIR
NAMES
level_zero/ze_api.h
HINTS
${_level_zero_search_dirs}
PATH_SUFFIXES
include
)
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LevelZero DEFAULT_MSG _LEVEL_ZERO_LIBRARY _LEVEL_ZERO_INCLUDE_DIR)
IF(LevelZero_FOUND)
SET(LEVEL_ZERO_LIBRARY ${_LEVEL_ZERO_LIBRARY})
SET(LEVEL_ZERO_INCLUDE_DIR ${_LEVEL_ZERO_INCLUDE_DIR} ${_LEVEL_ZERO_INCLUDE_PARENT_DIR})
SET(LEVEL_ZERO_FOUND TRUE)
ELSE()
SET(LEVEL_ZERO_FOUND FALSE)
ENDIF()
MARK_AS_ADVANCED(
LEVEL_ZERO_LIBRARY
LEVEL_ZERO_INCLUDE_DIR
)

View File

@@ -1,88 +0,0 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2021-2022 Intel Corporation
# - Find SYCL library
# Find the native SYCL header and libraries needed by oneAPI implementation
# This module defines
# SYCL_COMPILER, compiler which will be used for compilation of SYCL code
# SYCL_LIBRARY, libraries to link against in order to use SYCL.
# SYCL_INCLUDE_DIR, directories where SYCL headers can be found
# SYCL_ROOT_DIR, The base directory to search for SYCL files.
# This can also be an environment variable.
# SYCL_FOUND, If false, then don't try to use SYCL.
IF(NOT SYCL_ROOT_DIR AND NOT $ENV{SYCL_ROOT_DIR} STREQUAL "")
SET(SYCL_ROOT_DIR $ENV{SYCL_ROOT_DIR})
ENDIF()
SET(_sycl_search_dirs
${SYCL_ROOT_DIR}
/usr/lib
/usr/local/lib
/opt/intel/oneapi/compiler/latest/linux/
C:/Program\ Files\ \(x86\)/Intel/oneAPI/compiler/latest/windows
)
# Find DPC++ compiler.
# Since the compiler name is possibly conflicting with the system-wide
# CLang start with looking for either dpcpp or clang binary in the given
# list of search paths only. If that fails, try to look for a system-wide
# dpcpp binary.
FIND_PROGRAM(SYCL_COMPILER
NAMES
dpcpp
clang++
HINTS
${_sycl_search_dirs}
PATH_SUFFIXES
bin
NO_CMAKE_FIND_ROOT_PATH
NAMES_PER_DIR
)
# NOTE: No clang++ here so that we do not pick up a system-wide CLang
# compiler.
if(NOT SYCL_COMPILER)
FIND_PROGRAM(SYCL_COMPILER
NAMES
dpcpp
HINTS
${_sycl_search_dirs}
PATH_SUFFIXES
bin
)
endif()
FIND_LIBRARY(SYCL_LIBRARY
NAMES
sycl
HINTS
${_sycl_search_dirs}
PATH_SUFFIXES
lib64 lib
)
FIND_PATH(SYCL_INCLUDE_DIR
NAMES
CL/sycl.hpp
HINTS
${_sycl_search_dirs}
PATH_SUFFIXES
include
include/sycl
)
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL DEFAULT_MSG SYCL_LIBRARY SYCL_INCLUDE_DIR)
IF(SYCL_FOUND)
get_filename_component(_SYCL_INCLUDE_PARENT_DIR ${SYCL_INCLUDE_DIR} DIRECTORY)
SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${_SYCL_INCLUDE_PARENT_DIR})
ELSE()
SET(SYCL_SYCL_FOUND FALSE)
ENDIF()
MARK_AS_ADVANCED(
_SYCL_INCLUDE_PARENT_DIR
)

View File

@@ -70,7 +70,7 @@ if(NOT WIN32)
set(WITH_JACK ON CACHE BOOL "" FORCE)
endif()
if(WIN32)
set(WITH_WASAPI ON CACHE BOOL "" FORCE)
set(WITH_WASAPI ON CACHE BOOL "" FORCE)
endif()
if(UNIX AND NOT APPLE)
set(WITH_DOC_MANPAGE ON CACHE BOOL "" FORCE)
@@ -78,11 +78,6 @@ if(UNIX AND NOT APPLE)
set(WITH_PULSEAUDIO ON CACHE BOOL "" FORCE)
set(WITH_X11_XINPUT ON CACHE BOOL "" FORCE)
set(WITH_X11_XF86VMODE ON CACHE BOOL "" FORCE)
# Disable oneAPI on Linux for the time being.
# The AoT compilation takes too long to be used officially in the buildbot CI/CD and the JIT
# compilation has ABI compatibility issues when running builds made on centOS on Ubuntu.
set(WITH_CYCLES_DEVICE_ONEAPI OFF CACHE BOOL "" FORCE)
endif()
if(NOT APPLE)
set(WITH_XR_OPENXR ON CACHE BOOL "" FORCE)
@@ -91,8 +86,4 @@ if(NOT APPLE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
# Disable AoT kernels compilations until buildbot can deliver them in a reasonabel time.
set(WITH_CYCLES_ONEAPI_BINARIES OFF CACHE BOOL "" FORCE)
endif()

View File

@@ -38,15 +38,9 @@ if(EXISTS ${LIBDIR})
message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
file(GLOB LIB_SUBDIRS ${LIBDIR}/*)
# Ignore Mesa software OpenGL libraries, they are not intended to be
# linked against but to optionally override at runtime.
list(REMOVE_ITEM LIB_SUBDIRS ${LIBDIR}/mesa)
# Ignore DPC++ as it contains its own copy of LLVM/CLang which we do
# not need to be ever discovered for the Blender linking.
list(REMOVE_ITEM LIB_SUBDIRS ${LIBDIR}/dpcpp)
# NOTE: Make sure "proper" compiled zlib comes first before the one
# which is a part of OpenCollada. They have different ABI, and we
# do need to use the official one.
@@ -277,18 +271,6 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
endif()
endif()
if(WITH_CYCLES_DEVICE_ONEAPI)
set(CYCLES_LEVEL_ZERO ${LIBDIR}/level-zero CACHE PATH "Path to Level Zero installation")
if(EXISTS ${CYCLES_LEVEL_ZERO} AND NOT LEVEL_ZERO_ROOT_DIR)
set(LEVEL_ZERO_ROOT_DIR ${CYCLES_LEVEL_ZERO})
endif()
set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to DPC++ and SYCL installation")
if(EXISTS ${CYCLES_SYCL} AND NOT SYCL_ROOT_DIR)
set(SYCL_ROOT_DIR ${CYCLES_SYCL})
endif()
endif()
if(WITH_OPENVDB)
find_package_wrapper(OpenVDB)
find_package_wrapper(Blosc)
@@ -631,42 +613,17 @@ if(WITH_GHOST_WAYLAND)
pkg_check_modules(wayland-scanner REQUIRED wayland-scanner)
pkg_check_modules(xkbcommon REQUIRED xkbcommon)
pkg_check_modules(wayland-cursor REQUIRED wayland-cursor)
pkg_check_modules(dbus REQUIRED dbus-1)
if(WITH_GHOST_WAYLAND_DBUS)
pkg_check_modules(dbus REQUIRED dbus-1)
endif()
if(WITH_GHOST_WAYLAND_LIBDECOR)
pkg_check_modules(libdecor REQUIRED libdecor-0>=0.1)
endif()
set(WITH_GL_EGL ON)
list(APPEND PLATFORM_LINKLIBS
${wayland-client_LINK_LIBRARIES}
${wayland-egl_LINK_LIBRARIES}
${xkbcommon_LINK_LIBRARIES}
${wayland-cursor_LINK_LIBRARIES}
${dbus_LINK_LIBRARIES}
)
if(NOT WITH_GHOST_WAYLAND_DYNLOAD)
list(APPEND PLATFORM_LINKLIBS
${wayland-client_LINK_LIBRARIES}
${wayland-egl_LINK_LIBRARIES}
${wayland-cursor_LINK_LIBRARIES}
)
endif()
if(WITH_GHOST_WAYLAND_DBUS)
list(APPEND PLATFORM_LINKLIBS
${dbus_LINK_LIBRARIES}
)
add_definitions(-DWITH_GHOST_WAYLAND_DBUS)
endif()
if(WITH_GHOST_WAYLAND_LIBDECOR)
if(NOT WITH_GHOST_WAYLAND_DYNLOAD)
list(APPEND PLATFORM_LINKLIBS
${libdecor_LIBRARIES}
)
endif()
add_definitions(-DWITH_GHOST_WAYLAND_LIBDECOR)
endif()
endif()
if(WITH_GHOST_X11)

View File

@@ -950,6 +950,3 @@ endif()
set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
set(LEVEL_ZERO_ROOT_DIR ${LIBDIR}/level_zero)
set(SYCL_ROOT_DIR ${LIBDIR}/dpcpp)

View File

@@ -54,8 +54,6 @@ buildbot:
version: '10.1.243'
cuda11:
version: '11.4.1'
hip:
version: '5.2.21440'
optix:
version: '7.3.0'
cmake:

View File

@@ -1,111 +1,59 @@
# SPDX-License-Identifier: GPL-2.0-or-later
"""
---------------
Dump the python API into a text file so we can generate changelogs.
Dump the python API into a JSON file, or generate changelogs from those JSON API dumps.
output from this tool should be added into "doc/python_api/rst/change_log.rst"
Typically, changelog output from this tool should be added into "doc/python_api/rst/change_log.rst"
# dump api blender_version.py in CWD
blender --background --python doc/python_api/sphinx_changelog_gen.py -- --dump
API dump files are saved together with the generated API doc on the server, with a general index file.
This way the changelog generation simply needs to re-download the previous version's dump for the diffing process.
---------------
# Dump api blender_version.json in CWD:
blender --background --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
--indexpath="path/to/api/docs/api_dump_index.json" \
dump --filepath-out="path/to/api/docs/<version>/api_dump.json"
# Create changelog:
# create changelog
blender --background --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
--indexpath="path/to/api/docs/api_dump_index.json" \
changelog --filepath-out doc/python_api/rst/change_log.rst
--api_from blender_2_63_0.py \
--api_to blender_2_64_0.py \
--api_out changes.rst
# Api comparison can also run without blender,
# will by default generate changeloig between the last two available versions listed in the index,
# unless input files are provided explicitely:
# Api comparison can also run without blender
python doc/python_api/sphinx_changelog_gen.py -- \
--indexpath="path/to/api/docs/api_dump_index.json" \
changelog --filepath-in-from blender_api_2_63_0.json \
--filepath-in-to blender_api_2_64_0.json \
--filepath-out changes.rst
--api_from blender_api_2_63_0.py \
--api_to blender_api_2_64_0.py \
--api_out changes.rst
--------------
API dump index format:
{[version_main, version_sub]: "<version>/api_dump.json", ...
}
API dump format:
[
[version_main, vserion_sub, version_path],
{"module.name":
{"parent.class":
{"basic_type", "member_name":
["Name", type, range, length, default, descr, f_args, f_arg_types, f_ret_types]}, ...
}, ...
}
]
# Save the latest API dump in this folder, renaming it with its revision.
# This way the next person updating it doesn't need to build an old Blender only for that
"""
import json
import os
# format
'''
{"module.name":
{"parent.class":
{"basic_type", "member_name":
("Name", type, range, length, default, descr, f_args, f_arg_types, f_ret_types)}, ...
}, ...
}
'''
api_names = "basic_type" "name", "type", "range", "length", "default", "descr", "f_args", "f_arg_types", "f_ret_types"
API_BASIC_TYPE = 0
API_F_ARGS = 7
def api_version():
try:
import bpy
except:
return None, None
version = tuple(bpy.app.version[:2])
version_key = "%d.%d" % (version[0], version[1])
return version, version_key
def api_dunp_fname():
import bpy
return "blender_api_%s.py" % "_".join([str(i) for i in bpy.app.version])
def api_version_previous_in_index(index, version):
print("Searching for previous version to %s in %r" % (version, index))
version_prev = (version[0], version[1])
while True:
version_prev = (version_prev[0], version_prev[1] - 1)
if version_prev[1] < 0:
version_prev = (version_prev[0] - 1, 99)
if version_prev[0] < 0:
return None, None
version_prev_key = "%d.%d" % (version_prev[0], version_prev[1])
if version_prev_key in index:
print("Found previous version %s: %r" % (version_prev, index[version_prev_key]))
return version_prev, version_prev_key
class JSONEncoderAPIDump(json.JSONEncoder):
def default(self, o):
if o is ...:
return "..."
if isinstance(o, set):
return tuple(o)
return json.JSONEncoder.default(self, o)
def api_dump(args):
import rna_info
import inspect
version, version_key = api_version()
if version is None:
raise(ValueError("API dumps can only be generated from within Blender."))
def api_dump():
dump = {}
dump_module = dump["bpy.types"] = {}
import rna_info
import inspect
struct = rna_info.BuildRNAInfo()[0]
for struct_id, struct_info in sorted(struct.items()):
@@ -207,25 +155,17 @@ def api_dump(args):
)
del funcs
filepath_out = args.filepath_out
with open(filepath_out, 'w', encoding='utf-8') as file_handle:
json.dump((version, dump), file_handle, cls=JSONEncoderAPIDump)
import pprint
indexpath = args.indexpath
rootpath = os.path.dirname(indexpath)
if os.path.exists(indexpath):
with open(indexpath, 'r', encoding='utf-8') as file_handle:
index = json.load(file_handle)
else:
index = {}
index[version_key] = os.path.relpath(filepath_out, rootpath)
with open(indexpath, 'w', encoding='utf-8') as file_handle:
json.dump(index, file_handle)
print("API version %s dumped into %r, and index %r has been updated" % (version_key, filepath_out, indexpath))
filename = api_dunp_fname()
filehandle = open(filename, 'w', encoding='utf-8')
tot = filehandle.write(pprint.pformat(dump, width=1))
filehandle.close()
print("%s, %d bytes written" % (filename, tot))
def compare_props(a, b, fuzz=0.75):
# must be same basic_type, function != property
if a[0] != b[0]:
return False
@@ -240,44 +180,15 @@ def compare_props(a, b, fuzz=0.75):
return ((tot / totlen) >= fuzz)
def api_changelog(args):
indexpath = args.indexpath
filepath_in_from = args.filepath_in_from
filepath_in_to = args.filepath_in_to
filepath_out = args.filepath_out
def api_changelog(api_from, api_to, api_out):
rootpath = os.path.dirname(indexpath)
file_handle = open(api_from, 'r', encoding='utf-8')
dict_from = eval(file_handle.read())
file_handle.close()
version, version_key = api_version()
if version is None and (filepath_in_from is None or filepath_in_to is None):
raise(ValueError("API dumps files must be given when ran outside of Blender."))
with open(indexpath, 'r', encoding='utf-8') as file_handle:
index = json.load(file_handle)
if filepath_in_to is None:
filepath_in_to = index.get(version_key, None)
if filepath_in_to is None:
raise(ValueError("Cannot find API dump file for Blender version " + str(version) + " in index file."))
print("Found to file: %r" % filepath_in_to)
if filepath_in_from is None:
version_from, version_from_key = api_version_previous_in_index(index, version)
if version_from is None:
raise(ValueError("No previous version of Blender could be found in the index."))
filepath_in_from = index.get(version_from_key, None)
if filepath_in_from is None:
raise(ValueError("Cannot find API dump file for previous Blender version " + str(version_from) + " in index file."))
print("Found from file: %r" % filepath_in_from)
with open(os.path.join(rootpath, filepath_in_from), 'r', encoding='utf-8') as file_handle:
_, dict_from = json.load(file_handle)
with open(os.path.join(rootpath, filepath_in_to), 'r', encoding='utf-8') as file_handle:
dump_version, dict_to = json.load(file_handle)
assert(tuple(dump_version) == version)
file_handle = open(api_to, 'r', encoding='utf-8')
dict_to = eval(file_handle.read())
file_handle.close()
api_changes = []
@@ -338,66 +249,63 @@ def api_changelog(args):
# also document function argument changes
with open(filepath_out, 'w', encoding='utf-8') as fout:
fw = fout.write
fout = open(api_out, 'w', encoding='utf-8')
fw = fout.write
# print(api_changes)
# Write header.
fw(""
":tocdepth: 2\n"
"\n"
"Blender API Change Log\n"
"**********************\n"
"\n"
".. note, this document is auto generated by sphinx_changelog_gen.py\n"
"\n"
"\n"
"%s to %s\n"
"============\n"
"\n" % (version_from_key, version_key))
# :class:`bpy_struct.id_data`
def write_title(title, title_char):
fw("%s\n%s\n\n" % (title, title_char * len(title)))
def write_title(title, title_char):
fw("%s\n%s\n\n" % (title, title_char * len(title)))
for mod_id, class_id, props_moved, props_new, props_old, func_args in api_changes:
class_name = class_id.split(".")[-1]
title = mod_id + "." + class_name
write_title(title, "-")
for mod_id, class_id, props_moved, props_new, props_old, func_args in api_changes:
class_name = class_id.split(".")[-1]
title = mod_id + "." + class_name
write_title(title, "-")
if props_new:
write_title("Added", "^")
for prop_id in props_new:
fw("* :class:`%s.%s.%s`\n" % (mod_id, class_name, prop_id))
fw("\n")
if props_new:
write_title("Added", "^")
for prop_id in props_new:
fw("* :class:`%s.%s.%s`\n" % (mod_id, class_name, prop_id))
fw("\n")
if props_old:
write_title("Removed", "^")
for prop_id in props_old:
fw("* **%s**\n" % prop_id) # can't link to removed docs
fw("\n")
if props_old:
write_title("Removed", "^")
for prop_id in props_old:
fw("* **%s**\n" % prop_id) # can't link to removed docs
fw("\n")
if props_moved:
write_title("Renamed", "^")
for prop_id_old, prop_id in props_moved:
fw("* **%s** -> :class:`%s.%s.%s`\n" % (prop_id_old, mod_id, class_name, prop_id))
fw("\n")
if props_moved:
write_title("Renamed", "^")
for prop_id_old, prop_id in props_moved:
fw("* **%s** -> :class:`%s.%s.%s`\n" % (prop_id_old, mod_id, class_name, prop_id))
fw("\n")
if func_args:
write_title("Function Arguments", "^")
for func_id, args_old, args_new in func_args:
args_new = ", ".join(args_new)
args_old = ", ".join(args_old)
fw("* :class:`%s.%s.%s` (%s), *was (%s)*\n" % (mod_id, class_name, func_id, args_new, args_old))
fw("\n")
if func_args:
write_title("Function Arguments", "^")
for func_id, args_old, args_new in func_args:
args_new = ", ".join(args_new)
args_old = ", ".join(args_old)
fw("* :class:`%s.%s.%s` (%s), *was (%s)*\n" % (mod_id, class_name, func_id, args_new, args_old))
fw("\n")
print("Written: %r" % filepath_out)
fout.close()
print("Written: %r" % api_out)
def main(argv=None):
def main():
import sys
import argparse
import os
if argv is None:
argv = sys.argv
try:
import argparse
except ImportError:
print("Old Blender, just dumping")
api_dump()
return
argv = sys.argv
if "--" not in argv:
argv = [] # as if no args are passed
@@ -408,42 +316,42 @@ def main(argv=None):
usage_text = "Run blender in background mode with this script: "
"blender --background --factory-startup --python %s -- [options]" % os.path.basename(__file__)
parser = argparse.ArgumentParser(description=usage_text,
epilog=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
epilog = "Run this before releases"
parser = argparse.ArgumentParser(description=usage_text, epilog=epilog)
parser.add_argument(
"--indexpath", dest="indexpath", metavar='FILE', required=True,
help="Path of the JSON file containing the index of all available API dumps.")
"--dump", dest="dump", action='store_true',
help="When set the api will be dumped into blender_version.py")
parser_commands = parser.add_subparsers(required=True)
parser.add_argument(
"--api_from", dest="api_from", metavar='FILE',
help="File to compare from (previous version)")
parser.add_argument(
"--api_to", dest="api_to", metavar='FILE',
help="File to compare from (current)")
parser.add_argument(
"--api_out", dest="api_out", metavar='FILE',
help="Output sphinx changelog")
parser_dump = parser_commands.add_parser('dump', help="Dump the current Blender Python API into a JSON file.")
parser_dump.add_argument(
"--filepath-out", dest="filepath_out", metavar='FILE', required=True,
help="Path of the JSON file containing the dump of the API.")
parser_dump.set_defaults(func=api_dump)
args = parser.parse_args(argv) # In this example we won't use the args
parser_changelog = parser_commands.add_parser(
'changelog',
help="Generate the RST changelog page based on two Blender Python API JSON dumps.",
)
if not argv:
print("No args given!")
parser.print_help()
return
parser_changelog.add_argument(
"--filepath-in-from", dest="filepath_in_from", metavar='FILE', default=None,
help="JSON dump file to compare from (typically, previous version). "
"If not given, will be automatically determined from current Blender version and index file.")
parser_changelog.add_argument(
"--filepath-in-to", dest="filepath_in_to", metavar='FILE', default=None,
help="JSON dump file to compare to (typically, current version). "
"If not given, will be automatically determined from current Blender version and index file.")
parser_changelog.add_argument(
"--filepath-out", dest="filepath_out", metavar='FILE', required=True,
help="Output sphinx changelog RST file.")
parser_changelog.set_defaults(func=api_changelog)
if args.dump:
api_dump()
else:
if args.api_from and args.api_to and args.api_out:
api_changelog(args.api_from, args.api_to, args.api_out)
else:
print("Error: --api_from/api_to/api_out args needed")
parser.print_help()
return
args = parser.parse_args(argv)
args.func(args)
print("batch job finished, exiting")
if __name__ == "__main__":

View File

@@ -141,26 +141,6 @@ def handle_args():
required=False,
)
parser.add_argument(
"--api-changelog-generate",
dest="changelog",
default=False,
action='store_true',
help="Generate the API changelog RST file "
"(default=False, requires `--api-dump-index-path` parameter)",
required=False,
)
parser.add_argument(
"--api-dump-index-path",
dest="api_dump_index_path",
metavar='FILE',
default=None,
help="Path to the API dump index JSON file "
"(required when `--api-changelog-generate` is True)",
required=False,
)
parser.add_argument(
"-o", "--output",
dest="output_dir",
@@ -534,42 +514,6 @@ if ARGS.sphinx_build_pdf:
sphinx_make_pdf_log = os.path.join(ARGS.output_dir, ".latex_make.log")
SPHINX_MAKE_PDF_STDOUT = open(sphinx_make_pdf_log, "w", encoding="utf-8")
# --------------------------------CHANGELOG GENERATION--------------------------------------
def generate_changelog():
import importlib.util
spec = importlib.util.spec_from_file_location(
"sphinx_changelog_gen",
os.path.abspath(os.path.join(SCRIPT_DIR, "sphinx_changelog_gen.py")),
)
sphinx_changelog_gen = importlib.util.module_from_spec(spec)
spec.loader.exec_module(sphinx_changelog_gen)
API_DUMP_INDEX_FILEPATH = ARGS.api_dump_index_path
API_DUMP_ROOT = os.path.dirname(API_DUMP_INDEX_FILEPATH)
API_DUMP_FILEPATH = os.path.abspath(os.path.join(API_DUMP_ROOT, BLENDER_VERSION_DOTS, "api_dump.json"))
API_CHANGELOG_FILEPATH = os.path.abspath(os.path.join(SPHINX_IN_TMP, "change_log.rst"))
sphinx_changelog_gen.main((
"--",
"--indexpath",
API_DUMP_INDEX_FILEPATH,
"dump",
"--filepath-out",
API_DUMP_FILEPATH,
))
sphinx_changelog_gen.main((
"--",
"--indexpath",
API_DUMP_INDEX_FILEPATH,
"changelog",
"--filepath-out",
API_CHANGELOG_FILEPATH,
))
# --------------------------------API DUMP--------------------------------------
# Lame, python won't give some access.
@@ -1529,8 +1473,7 @@ def pyrna2sphinx(basepath):
else:
fw(".. class:: %s\n\n" % struct_id)
write_indented_lines(" ", fw, struct.description, False)
fw("\n")
fw(" %s\n\n" % struct.description)
# Properties sorted in alphabetical order.
sorted_struct_properties = struct.properties[:]
@@ -2530,9 +2473,6 @@ def main():
rna2sphinx(SPHINX_IN_TMP)
if ARGS.changelog:
generate_changelog()
if ARGS.full_rebuild:
# Only for full updates.
shutil.rmtree(SPHINX_IN, True)

View File

@@ -270,7 +270,7 @@ AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int sampl
return length;
}
AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
{
try
{
@@ -282,20 +282,15 @@ AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int lengt
std::shared_ptr<IWriter> writer = FileWriter::createWriter(filename, convCToDSpec(specs), static_cast<Container>(format), static_cast<Codec>(codec), bitrate);
FileWriter::writeReader(reader, writer, length, buffersize, callback, data);
return true;
return nullptr;
}
catch(Exception& e)
{
if(error && errorsize)
{
std::strncpy(error, e.getMessage().c_str(), errorsize);
error[errorsize - 1] = '\0';
}
return false;
return e.getMessage().c_str();
}
}
AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
{
try
{
@@ -333,16 +328,11 @@ AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsign
reader->seek(start);
FileWriter::writeReader(reader, writers, length, buffersize, callback, data);
return true;
return nullptr;
}
catch(Exception& e)
{
if(error && errorsize)
{
std::strncpy(error, e.getMessage().c_str(), errorsize);
error[errorsize - 1] = '\0';
}
return false;
return e.getMessage().c_str();
}
}

View File

@@ -70,15 +70,13 @@ extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, in
* \param bitrate The bitrate for encoding.
* \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
* \param data Pass through parameter that is passed to the callback.
* \param error String buffer to copy the error message to in case of failure.
* \param errorsize The size of the error buffer.
* \return Whether or not the operation succeeded.
* \return An error message or NULL in case of success.
*/
extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
unsigned int buffersize, const char* filename,
AUD_DeviceSpecs specs, AUD_Container format,
AUD_Codec codec, unsigned int bitrate,
void(*callback)(float, void*), void* data, char* error, size_t errorsize);
void(*callback)(float, void*), void* data);
/**
* Mixes a sound down into multiple files.
@@ -93,15 +91,13 @@ extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned in
* \param bitrate The bitrate for encoding.
* \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
* \param data Pass through parameter that is passed to the callback.
* \param error String buffer to copy the error message to in case of failure.
* \param errorsize The size of the error buffer.
* \return Whether or not the operation succeeded.
* \return An error message or NULL in case of success.
*/
extern AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
extern AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
unsigned int buffersize, const char* filename,
AUD_DeviceSpecs specs, AUD_Container format,
AUD_Codec codec, unsigned int bitrate,
void(*callback)(float, void*), void* data, char* error, size_t errorsize);
void(*callback)(float, void*), void* data);
/**
* Opens a read device and prepares it for mixdown of the sound scene.

View File

@@ -41,7 +41,7 @@ double PulseAudioDevice::PulseAudioSynchronizer::getPosition(std::shared_ptr<IHa
void PulseAudioDevice::updateRingBuffer()
{
unsigned int samplesize = AUD_DEVICE_SAMPLE_SIZE(m_specs);
unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs);
std::unique_lock<std::mutex> lock(m_mixingLock);

View File

@@ -1,5 +1,5 @@
Project: Curve-Fit-nD
URL: https://github.com/ideasman42/curve-fit-nd
License: BSD 3-Clause
Upstream version: ae32da9de264c3ed399673e2bc1bc09003799416 (Last Release)
Upstream version: ddcd5bd (Last Release)
Local modifications: None

View File

@@ -39,7 +39,7 @@
* Takes a flat array of points and evaluates that to calculate a bezier spline.
*
* \param points, points_len: The array of points to calculate a cubics from.
* \param dims: The number of dimensions for each element in \a points.
* \param dims: The number of dimensions for for each element in \a points.
* \param error_threshold: the error threshold to allow for,
* the curve will be within this distance from \a points.
* \param corners, corners_len: indices for points which will not have aligned tangents (optional).
@@ -47,10 +47,10 @@
* to evaluate a line to detect corner indices.
*
* \param r_cubic_array, r_cubic_array_len: Resulting array of tangents and knots, formatted as follows:
* `r_cubic_array[r_cubic_array_len][3][dims]`,
* ``r_cubic_array[r_cubic_array_len][3][dims]``,
* where each point has 0 and 2 for the tangents and the middle index 1 for the knot.
* The size of the *flat* array will be `r_cubic_array_len * 3 * dims`.
* \param r_corner_index_array, r_corner_index_len: Corner indices in \a r_cubic_array (optional).
* The size of the *flat* array will be ``r_cubic_array_len * 3 * dims``.
* \param r_corner_index_array, r_corner_index_len: Corner indices in in \a r_cubic_array (optional).
* This allows you to access corners on the resulting curve.
*
* \returns zero on success, nonzero is reserved for error values.
@@ -85,7 +85,7 @@ int curve_fit_cubic_to_points_fl(
* Takes a flat array of points and evaluates that to calculate handle lengths.
*
* \param points, points_len: The array of points to calculate a cubics from.
* \param dims: The number of dimensions for each element in \a points.
* \param dims: The number of dimensions for for each element in \a points.
* \param points_length_cache: Optional pre-calculated lengths between points.
* \param error_threshold: the error threshold to allow for,
* \param tan_l, tan_r: Normalized tangents the handles will be aligned to.
@@ -166,7 +166,7 @@ int curve_fit_cubic_to_points_refit_fl(
* A helper function that takes a line and outputs its corner indices.
*
* \param points, points_len: Curve to evaluate.
* \param dims: The number of dimensions for each element in \a points.
* \param dims: The number of dimensions for for each element in \a points.
* \param radius_min: Corners on the curve between points below this radius are ignored.
* \param radius_max: Corners on the curve above this radius are ignored.
* \param samples_max: Prevent testing corners beyond this many points

View File

@@ -43,24 +43,20 @@
#include "../curve_fit_nd.h"
/** Take curvature into account when calculating the least square solution isn't usable. */
/* Take curvature into account when calculating the least square solution isn't usable. */
#define USE_CIRCULAR_FALLBACK
/**
* Use the maximum distance of any points from the direct line between 2 points
/* Use the maximum distance of any points from the direct line between 2 points
* to calculate how long the handles need to be.
* Can do a 'perfect' reversal of subdivision when for curve has symmetrical handles and doesn't change direction
* (as with an 'S' shape).
*/
* (as with an 'S' shape). */
#define USE_OFFSET_FALLBACK
/** Avoid re-calculating lengths multiple times. */
/* avoid re-calculating lengths multiple times */
#define USE_LENGTH_CACHE
/**
* Store the indices in the cubic data so we can return the original indices,
* useful when the caller has data associated with the curve.
*/
/* store the indices in the cubic data so we can return the original indices,
* useful when the caller has data associated with the curve. */
#define USE_ORIG_INDEX_DATA
typedef unsigned int uint;
@@ -99,15 +95,13 @@ typedef unsigned int uint;
* \{ */
typedef struct Cubic {
/** Single linked lists. */
/* single linked lists */
struct Cubic *next;
#ifdef USE_ORIG_INDEX_DATA
uint orig_span;
#endif
/**
* 0: point_0, 1: handle_0, 2: handle_1, 3: point_1,
* each one is offset by 'dims'.
*/
/* 0: point_0, 1: handle_0, 2: handle_1, 3: point_1,
* each one is offset by 'dims' */
double pt_data[0];
} Cubic;
@@ -201,7 +195,7 @@ static double *cubic_list_as_array(
bool use_orig_index = (r_orig_index != NULL);
#endif
/* Fill the array backwards. */
/* fill the array backwards */
const size_t array_chunk = 3 * dims;
double *array_iter = array + array_flat_len;
for (Cubic *citer = clist->items; citer; citer = citer->next) {
@@ -227,15 +221,15 @@ static double *cubic_list_as_array(
}
#endif
/* Flip tangent for first and last (we could leave at zero, but set to something useful). */
/* flip tangent for first and last (we could leave at zero, but set to something useful) */
/* First. */
/* first */
array_iter -= array_chunk;
memcpy(&array_iter[dims], handle_prev, sizeof(double) * 2 * dims);
flip_vn_vnvn(&array_iter[0 * dims], &array_iter[1 * dims], &array_iter[2 * dims], dims);
assert(array == array_iter);
/* Last. */
/* last */
array_iter += array_flat_len - (3 * dims);
flip_vn_vnvn(&array_iter[2 * dims], &array_iter[1 * dims], &array_iter[0 * dims], dims);
@@ -461,7 +455,7 @@ static double points_calc_circumference_factor(
const double dot = dot_vnvn(tan_l, tan_r, dims);
const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims);
if (len_tangent > DBL_EPSILON) {
/* Only clamp to avoid precision error. */
/* only clamp to avoid precision error */
double angle = acos(max(-fabs(dot), -1.0));
/* Angle may be less than the length when the tangents define >180 degrees of the circle,
* (tangents that point away from each other).
@@ -472,7 +466,7 @@ static double points_calc_circumference_factor(
return factor;
}
else {
/* Tangents are exactly aligned (think two opposite sides of a circle). */
/* tangents are exactly aligned (think two opposite sides of a circle). */
return (M_PI / 2);
}
}
@@ -491,18 +485,18 @@ static double points_calc_circle_tangent_factor(
const double eps = 1e-8;
const double tan_dot = dot_vnvn(tan_l, tan_r, dims);
if (tan_dot > 1.0 - eps) {
/* No angle difference (use fallback, length won't make any difference). */
/* no angle difference (use fallback, length wont make any difference) */
return (1.0 / 3.0) * 0.75;
}
else if (tan_dot < -1.0 + eps) {
/* Parallel tangents (half-circle). */
/* parallel tangents (half-circle) */
return (1.0 / 2.0);
}
else {
/* Non-aligned tangents, calculate handle length. */
/* non-aligned tangents, calculate handle length */
const double angle = acos(tan_dot) / 2.0;
/* Could also use `angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0`. */
/* could also use 'angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0' */
const double angle_sin = sin(angle);
const double angle_cos = cos(angle);
return ((1.0 - angle_cos) / (angle_sin * 2.0)) / angle_sin;
@@ -522,15 +516,15 @@ static double points_calc_cubic_scale(
const double len_direct = len_vnvn(v_l, v_r, dims);
const double len_circle_factor = points_calc_circle_tangent_factor(tan_l, tan_r, dims);
/* If this curve is a circle, this value doesn't need modification. */
/* if this curve is a circle, this value doesn't need modification */
const double len_circle_handle = (len_direct * (len_circle_factor / 0.75));
/* Scale by the difference from the circumference distance. */
/* scale by the difference from the circumference distance */
const double len_circle = len_direct * points_calc_circumference_factor(tan_l, tan_r, dims);
double scale_handle = (coords_length / len_circle);
/* Could investigate an accurate calculation here,
* though this gives close results. */
* though this gives close results */
scale_handle = ((scale_handle - 1.0) * 1.75) + 1.0;
return len_circle_handle * scale_handle;
@@ -560,8 +554,9 @@ static void cubic_from_points_fallback(
r_cubic->orig_span = (points_offset_len - 1);
#endif
/* `p1 = p0 - (tan_l * alpha);`
* `p2 = p3 + (tan_r * alpha);` */
/* p1 = p0 - (tan_l * alpha);
* p2 = p3 + (tan_r * alpha);
*/
msub_vn_vnvn_fl(p1, p0, tan_l, alpha, dims);
madd_vn_vnvn_fl(p2, p3, tan_r, alpha, dims);
}
@@ -599,7 +594,7 @@ static void cubic_from_points_offset_fallback(
project_plane_vn_vnvn_normalized(a[0], tan_l, dir_unit, dims);
project_plane_vn_vnvn_normalized(a[1], tan_r, dir_unit, dims);
/* Only for better accuracy, not essential. */
/* only for better accuracy, not essential */
normalize_vn(a[0], dims);
normalize_vn(a[1], dims);
@@ -625,7 +620,7 @@ static void cubic_from_points_offset_fallback(
*
* The 'dists[..] + dir_dirs' limit is just a rough approximation.
* While a more exact value could be calculated,
* in this case the error values approach divide by zero (infinite)
* in this case the error values approach divide by zero (inf)
* so there is no need to be too precise when checking if limits have been exceeded. */
double alpha_l = (dists[0] / 0.75) / fabs(dot_vnvn(tan_l, a[0], dims));
@@ -649,8 +644,9 @@ static void cubic_from_points_offset_fallback(
r_cubic->orig_span = (points_offset_len - 1);
#endif
/* `p1 = p0 - (tan_l * alpha_l);`
* `p2 = p3 + (tan_r * alpha_r);` */
/* p1 = p0 - (tan_l * alpha_l);
* p2 = p3 + (tan_r * alpha_r);
*/
msub_vn_vnvn_fl(p1, p0, tan_l, alpha_l, dims);
madd_vn_vnvn_fl(p2, p3, tan_r, alpha_r, dims);
}
@@ -678,7 +674,7 @@ static void cubic_from_points(
const double *p0 = &points_offset[0];
const double *p3 = &points_offset[(points_offset_len - 1) * dims];
/* Point Pairs. */
/* Point Pairs */
double alpha_l, alpha_r;
#ifdef USE_VLA
double a[2][dims];
@@ -700,7 +696,7 @@ static void cubic_from_points(
const double b0_plus_b1 = B0plusB1(u_prime[i]);
const double b2_plus_b3 = B2plusB3(u_prime[i]);
/* Inline dot product. */
/* inline dot product */
for (uint j = 0; j < dims; j++) {
const double tmp = (pt[j] - (p0[j] * b0_plus_b1)) + (p3[j] * b2_plus_b3);
@@ -723,7 +719,7 @@ static void cubic_from_points(
det_C0_C1 = c[0][0] * c[1][1] * 10e-12;
}
/* May still divide-by-zero, check below will catch NAN values. */
/* may still divide-by-zero, check below will catch nan values */
alpha_l = det_X_C1 / det_C0_C1;
alpha_r = det_C_0X / det_C0_C1;
}
@@ -740,7 +736,7 @@ static void cubic_from_points(
bool use_clamp = true;
/* Flip check to catch NAN values. */
/* flip check to catch nan values */
if (!(alpha_l >= 0.0) ||
!(alpha_r >= 0.0))
{
@@ -754,7 +750,7 @@ static void cubic_from_points(
alpha_l = alpha_r = len_vnvn(p0, p3, dims) / 3.0;
#endif
/* Skip clamping when we're using default handles. */
/* skip clamping when we're using default handles */
use_clamp = false;
}
@@ -768,8 +764,9 @@ static void cubic_from_points(
r_cubic->orig_span = (points_offset_len - 1);
#endif
/* `p1 = p0 - (tan_l * alpha_l);`
* `p2 = p3 + (tan_r * alpha_r);` */
/* p1 = p0 - (tan_l * alpha_l);
* p2 = p3 + (tan_r * alpha_r);
*/
msub_vn_vnvn_fl(p1, p0, tan_l, alpha_l, dims);
madd_vn_vnvn_fl(p2, p3, tan_r, alpha_r, dims);
@@ -784,7 +781,7 @@ static void cubic_from_points(
#endif
points_calc_center_weighted(points_offset, points_offset_len, dims, center);
const double clamp_scale = 3.0; /* Clamp to 3x. */
const double clamp_scale = 3.0; /* clamp to 3x */
double dist_sq_max = 0.0;
{
@@ -793,7 +790,7 @@ static void cubic_from_points(
#if 0
double dist_sq_test = sq(len_vnvn(center, pt, dims) * clamp_scale);
#else
/* Do inline. */
/* do inline */
double dist_sq_test = 0.0;
for (uint j = 0; j < dims; j++) {
dist_sq_test += sq((pt[j] - center[j]) * clamp_scale);
@@ -819,8 +816,10 @@ static void cubic_from_points(
alpha_l = alpha_r = len_vnvn(p0, p3, dims) / 3.0;
#endif
/* `p1 = p0 - (tan_l * alpha_l);`
* `p2 = p3 + (tan_r * alpha_r);` */
/*
* p1 = p0 - (tan_l * alpha_l);
* p2 = p3 + (tan_r * alpha_r);
*/
for (uint j = 0; j < dims; j++) {
p1[j] = p0[j] - (tan_l[j] * alpha_l);
p2[j] = p3[j] + (tan_r[j] * alpha_r);
@@ -830,7 +829,7 @@ static void cubic_from_points(
p2_dist_sq = len_squared_vnvn(center, p2, dims);
}
/* Clamp within the 3x radius. */
/* clamp within the 3x radius */
if (p1_dist_sq > dist_sq_max) {
isub_vnvn(p1, center, dims);
imul_vn_fl(p1, sqrt(dist_sq_max) / sqrt(p1_dist_sq), dims);
@@ -842,7 +841,7 @@ static void cubic_from_points(
iadd_vnvn(p2, center, dims);
}
}
/* End clamping. */
/* end clamping */
}
#ifdef USE_LENGTH_CACHE
@@ -918,7 +917,7 @@ static double cubic_find_root(
const uint dims)
{
/* Newton-Raphson Method. */
/* All vectors. */
/* all vectors */
#ifdef USE_VLA
double q0_u[dims];
double q1_u[dims];
@@ -933,8 +932,8 @@ static double cubic_find_root(
cubic_calc_speed(cubic, u, dims, q1_u);
cubic_calc_acceleration(cubic, u, dims, q2_u);
/* May divide-by-zero, caller must check for that case. */
/* `u - ((q0_u - p) * q1_u) / (q1_u.length_squared() + (q0_u - p) * q2_u)` */
/* may divide-by-zero, caller must check for that case */
/* u - ((q0_u - p) * q1_u) / (q1_u.length_squared() + (q0_u - p) * q2_u) */
isub_vnvn(q0_u, p, dims);
return u - dot_vnvn(q0_u, q1_u, dims) /
(len_squared_vn(q1_u, dims) + dot_vnvn(q0_u, q2_u, dims));
@@ -1033,7 +1032,7 @@ static bool fit_cubic_to_points(
double error_max_sq;
uint split_index;
/* Parameterize points, and attempt to fit curve. */
/* Parameterize points, and attempt to fit curve */
cubic_from_points(
points_offset, points_offset_len,
#ifdef USE_CIRCULAR_FALLBACK
@@ -1041,7 +1040,7 @@ static bool fit_cubic_to_points(
#endif
u, tan_l, tan_r, dims, r_cubic);
/* Find max deviation of points to fitted curve. */
/* Find max deviation of points to fitted curve */
error_max_sq = cubic_calc_error(
r_cubic, points_offset, points_offset_len, u, dims,
&split_index);
@@ -1063,7 +1062,7 @@ static bool fit_cubic_to_points(
cubic_test, points_offset, points_offset_len, u, dims,
&split_index);
/* Intentionally use the newly calculated 'split_index',
/* intentionally use the newly calculated 'split_index',
* even if the 'error_max_sq_test' is worse. */
if (error_max_sq > error_max_sq_test) {
error_max_sq = error_max_sq_test;
@@ -1072,7 +1071,7 @@ static bool fit_cubic_to_points(
}
#endif
/* Test the offset fallback. */
/* Test the offset fallback */
#ifdef USE_OFFSET_FALLBACK
if (!(error_max_sq < error_threshold_sq)) {
/* Using the offset from the curve to calculate cubic handle length may give better results
@@ -1096,7 +1095,7 @@ static bool fit_cubic_to_points(
if (!(error_max_sq < error_threshold_sq)) {
cubic_copy(cubic_test, r_cubic, dims);
/* If error not too large, try some re-parameterization and iteration. */
/* If error not too large, try some reparameterization and iteration */
double *u_prime = malloc(sizeof(double) * points_offset_len);
for (uint iter = 0; iter < iteration_max; iter++) {
if (!cubic_reparameterize(
@@ -1124,7 +1123,7 @@ static bool fit_cubic_to_points(
}
if (!(error_max_sq < error_threshold_sq)) {
/* Continue. */
/* continue */
}
else {
assert((error_max_sq < error_threshold_sq));
@@ -1157,7 +1156,7 @@ static void fit_cubic_to_points_recursive(
const double error_threshold_sq,
const uint calc_flag,
const uint dims,
/* Fill in the list. */
/* fill in the list */
CubicList *clist)
{
Cubic *cubic = cubic_alloc(dims);
@@ -1181,7 +1180,7 @@ static void fit_cubic_to_points_recursive(
cubic_free(cubic);
/* Fitting failed -- split at max error point and fit recursively. */
/* Fitting failed -- split at max error point and fit recursively */
/* Check splinePoint is not an endpoint?
*
@@ -1213,7 +1212,7 @@ static void fit_cubic_to_points_recursive(
#endif
const double *pt = &points_offset[split_index * dims];
/* `tan_center = ((pt_a - pt).normalized() + (pt - pt_b).normalized()).normalized()`. */
/* tan_center = ((pt_a - pt).normalized() + (pt - pt_b).normalized()).normalized() */
normalize_vn_vnvn(tan_center_a, pt_a, pt, dims);
normalize_vn_vnvn(tan_center_b, pt, pt_b, dims);
add_vn_vnvn(tan_center, tan_center_a, tan_center_b, dims);
@@ -1307,8 +1306,9 @@ int curve_fit_cubic_to_points_db(
const double *pt_l_next = pt_l + dims;
const double *pt_r_prev = pt_r - dims;
/* `tan_l = (pt_l - pt_l_next).normalized();`
* `tan_r = (pt_r_prev - pt_r).normalized();` */
/* tan_l = (pt_l - pt_l_next).normalized()
* tan_r = (pt_r_prev - pt_r).normalized()
*/
normalize_vn_vnvn(tan_l, pt_l, pt_l_next, dims);
normalize_vn_vnvn(tan_r, pt_r_prev, pt_r, dims);
@@ -1362,7 +1362,7 @@ int curve_fit_cubic_to_points_db(
*r_cubic_orig_index = NULL;
#endif
/* Allocate a contiguous array and free the linked list. */
/* allocate a contiguous array and free the linked list */
*r_cubic_array = cubic_list_as_array(
&clist
#ifdef USE_ORIG_INDEX_DATA
@@ -1454,7 +1454,7 @@ int curve_fit_cubic_to_points_single_db(
{
Cubic *cubic = alloca(cubic_alloc_size(dims));
/* In this instance there are no advantage in using length cache,
/* in this instance theres no advantage in using length cache,
* since we're not recursively calculating values. */
#ifdef USE_LENGTH_CACHE
double *points_length_cache_alloc = NULL;

View File

@@ -1490,4 +1490,3 @@ int curve_fit_cubic_to_points_refit_fl(
return result;
}

View File

@@ -37,7 +37,7 @@
* - #TPOOL_STRUCT: Name for pool struct name.
* - #TPOOL_CHUNK_SIZE: Chunk size (optional), use 64kb when not defined.
*
* \note #TPOOL_ALLOC_TYPE must be at least `sizeof(void *)`.
* \note #TPOOL_ALLOC_TYPE must be at least ``sizeof(void *)``.
*
* Defines the API, uses #TPOOL_IMPL_PREFIX to prefix each function.
*

View File

@@ -305,3 +305,5 @@ void *HEAP_node_ptr(HeapNode *node)
{
return node->ptr;
}
/** \} */

View File

@@ -2,5 +2,4 @@ Project: Mantaflow
URL: http://mantaflow.com/
License: Apache 2.0
Upstream version: 0.13
Local modifications:
* ./patches/local_namespace.diff to support loading MANTA variables into an isolated __main__ name-space.
Local modifications: None

View File

@@ -115,7 +115,7 @@ class WrapperRegistry {
void construct(const std::string &scriptname, const vector<string> &args);
void cleanup();
void renameObjects();
void runPreInit(PyObject *name_space);
void runPreInit();
PyObject *initModule();
ClassData *lookup(const std::string &name);
bool canConvert(ClassData *from, ClassData *to);
@@ -505,7 +505,7 @@ void WrapperRegistry::addConstants(PyObject *module)
}
}
void WrapperRegistry::runPreInit(PyObject *name_space)
void WrapperRegistry::runPreInit()
{
// add python directories to path
PyObject *sys_path = PySys_GetObject((char *)"path");
@@ -518,15 +518,7 @@ void WrapperRegistry::runPreInit(PyObject *name_space)
}
if (!mCode.empty()) {
mCode = "from manta import *\n" + mCode;
PyObject *return_value = PyRun_String(mCode.c_str(), Py_file_input, name_space, name_space);
if (return_value == nullptr) {
if (PyErr_Occurred()) {
PyErr_Print();
}
}
else {
Py_DECREF(return_value);
}
PyRun_SimpleString(mCode.c_str());
}
}
@@ -706,23 +698,16 @@ PyObject *WrapperRegistry::initModule()
//******************************************************
// Register members and exposed functions
void setup(const bool python_lifecycle,
const std::string &filename,
const std::vector<std::string> &args,
PyObject *name_space)
void setup(const std::string &filename, const std::vector<std::string> &args)
{
WrapperRegistry::instance().construct(filename, args);
if (python_lifecycle) {
Py_Initialize();
}
WrapperRegistry::instance().runPreInit(name_space);
Py_Initialize();
WrapperRegistry::instance().runPreInit();
}
void finalize(const bool python_lifecycle)
void finalize()
{
if (python_lifecycle) {
Py_Finalize();
}
Py_Finalize();
WrapperRegistry::instance().cleanup();
}

View File

@@ -48,11 +48,8 @@ template<class T> struct Namify {
namespace Pb {
// internal registry access
void setup(bool python_lifecycle,
const std::string &filename,
const std::vector<std::string> &args,
PyObject *name_space);
void finalize(bool python_lifecycle);
void setup(const std::string &filename, const std::vector<std::string> &args);
void finalize();
bool canConvert(PyObject *obj, const std::string &to);
Manta::PbClass *objFromPy(PyObject *obj);
Manta::PbClass *createPy(const std::string &classname,

View File

@@ -1,86 +0,0 @@
diff --git a/extern/mantaflow/helper/pwrapper/registry.cpp b/extern/mantaflow/helper/pwrapper/registry.cpp
index 5196c0409f8..b4206a41dea 100644
--- a/extern/mantaflow/helper/pwrapper/registry.cpp
+++ b/extern/mantaflow/helper/pwrapper/registry.cpp
@@ -115,7 +115,7 @@ class WrapperRegistry {
void construct(const std::string &scriptname, const vector<string> &args);
void cleanup();
void renameObjects();
- void runPreInit();
+ void runPreInit(PyObject *name_space);
PyObject *initModule();
ClassData *lookup(const std::string &name);
bool canConvert(ClassData *from, ClassData *to);
@@ -505,7 +505,7 @@ void WrapperRegistry::addConstants(PyObject *module)
}
}
-void WrapperRegistry::runPreInit()
+void WrapperRegistry::runPreInit(PyObject *name_space)
{
// add python directories to path
PyObject *sys_path = PySys_GetObject((char *)"path");
@@ -518,7 +518,15 @@ void WrapperRegistry::runPreInit()
}
if (!mCode.empty()) {
mCode = "from manta import *\n" + mCode;
- PyRun_SimpleString(mCode.c_str());
+ PyObject *return_value = PyRun_String(mCode.c_str(), Py_file_input, name_space, name_space);
+ if (return_value == nullptr) {
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ }
+ else {
+ Py_DECREF(return_value);
+ }
}
}
@@ -698,16 +706,23 @@ PyObject *WrapperRegistry::initModule()
//******************************************************
// Register members and exposed functions
-void setup(const std::string &filename, const std::vector<std::string> &args)
+void setup(const bool python_lifecycle,
+ const std::string &filename,
+ const std::vector<std::string> &args,
+ PyObject *name_space)
{
WrapperRegistry::instance().construct(filename, args);
- Py_Initialize();
- WrapperRegistry::instance().runPreInit();
+ if (python_lifecycle) {
+ Py_Initialize();
+ }
+ WrapperRegistry::instance().runPreInit(name_space);
}
-void finalize()
+void finalize(const bool python_lifecycle)
{
- Py_Finalize();
+ if (python_lifecycle) {
+ Py_Finalize();
+ }
WrapperRegistry::instance().cleanup();
}
diff --git a/extern/mantaflow/helper/pwrapper/registry.h b/extern/mantaflow/helper/pwrapper/registry.h
index d9d2bbb624b..2273d0b9bb1 100644
--- a/extern/mantaflow/helper/pwrapper/registry.h
+++ b/extern/mantaflow/helper/pwrapper/registry.h
@@ -48,8 +48,11 @@ template<class T> struct Namify {
namespace Pb {
// internal registry access
-void setup(const std::string &filename, const std::vector<std::string> &args);
-void finalize();
+void setup(bool python_lifecycle,
+ const std::string &filename,
+ const std::vector<std::string> &args,
+ PyObject *name_space);
+void finalize(bool python_lifecycle);
bool canConvert(PyObject *obj, const std::string &to);
Manta::PbClass *objFromPy(PyObject *obj);
Manta::PbClass *createPy(const std::string &classname,

View File

@@ -67,10 +67,3 @@ endif()
if(UNIX AND NOT APPLE)
add_subdirectory(libc_compat)
endif()
if(UNIX AND NOT APPLE)
# Important this comes after "ghost" as it uses includes defined by GHOST's CMake.
if(WITH_GHOST_WAYLAND AND WITH_GHOST_WAYLAND_DYNLOAD)
add_subdirectory(wayland_dynload)
endif()
endif()

View File

@@ -263,10 +263,6 @@ if(WITH_CYCLES_DEVICE_OPTIX)
endif()
endif()
if (WITH_CYCLES_DEVICE_ONEAPI)
add_definitions(-DWITH_ONEAPI)
endif()
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
include_directories(

View File

@@ -128,6 +128,10 @@ if(WITH_OPENIMAGEDENOISE)
)
endif()
if(WITH_EXPERIMENTAL_FEATURES)
add_definitions(-DWITH_NEW_CURVES_TYPE)
endif()
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
add_dependencies(bf_intern_cycles bf_rna)

View File

@@ -84,36 +84,10 @@ class AddPresetViewportSampling(AddPresetBase, Operator):
preset_subdir = "cycles/viewport_sampling"
class AddPresetPerformance(AddPresetBase, Operator):
'''Add an Performance Preset'''
bl_idname = "render.cycles_performance_preset_add"
bl_label = "Add Performance Preset"
preset_menu = "CYCLES_PT_performance_presets"
preset_defines = [
"render = bpy.context.scene.render"
"cycles = bpy.context.scene.cycles"
]
preset_values = [
"render.threads_mode",
"render.use_persistent_data",
"cycles.debug_use_spatial_splits",
"cycles.debug_use_compact_bvh",
"cycles.debug_use_hair_bvh",
"cycles.debug_bvh_time_steps",
"cycles.use_auto_tile",
"cycles.tile_size",
]
preset_subdir = "cycles/performance"
classes = (
AddPresetIntegrator,
AddPresetSampling,
AddPresetViewportSampling,
AddPresetPerformance,
)

View File

@@ -118,8 +118,7 @@ enum_device_type = (
('CUDA', "CUDA", "CUDA", 1),
('OPTIX', "OptiX", "OptiX", 3),
('HIP', "HIP", "HIP", 4),
('METAL', "Metal", "Metal", 5),
('ONEAPI', "oneAPI", "oneAPI", 6)
('METAL', "Metal", "Metal", 5)
)
enum_texture_limit = (
@@ -693,7 +692,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
debug_use_compact_bvh: BoolProperty(
name="Use Compact BVH",
description="Use compact BVH structure (uses less ram but renders slower)",
default=False,
default=True,
)
debug_bvh_time_steps: IntProperty(
name="BVH Time Steps",
@@ -1398,8 +1397,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def get_device_types(self, context):
import _cycles
has_cuda, has_optix, has_hip, has_metal, has_oneapi = _cycles.get_device_types()
has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
list = [('NONE', "None", "Don't use compute device", 0)]
if has_cuda:
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
@@ -1409,8 +1407,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
if has_metal:
list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))
if has_oneapi:
list.append(('ONEAPI', "oneAPI", "Use oneAPI for GPU acceleration", 6))
return list
@@ -1442,7 +1438,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def update_device_entries(self, device_list):
for device in device_list:
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL', 'ONEAPI'}:
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
continue
# Try to find existing Device entry
entry = self.find_existing_device_entry(device)
@@ -1486,7 +1482,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
import _cycles
# Ensure `self.devices` is not re-allocated when the second call to
# get_devices_for_type is made, freeing items from the first list.
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL', 'ONEAPI'):
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
self.update_device_entries(_cycles.available_devices(device_type))
# Deprecated: use refresh_devices instead.
@@ -1549,31 +1545,18 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif device_type == 'HIP':
import sys
if sys.platform[:3] == "win":
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
elif device_type == 'ONEAPI':
import sys
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
if sys.platform.startswith("win"):
col.label(text="and Windows driver version 101.1660 or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="and Linux driver version xx.xx.23570 or newer", icon='BLANK1')
elif device_type == 'METAL':
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
return
for device in devices:
import unicodedata
box.prop(
device, "use", text=device.name
.replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
.replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
)
box.prop(device, "use", text=device.name)
def draw_impl(self, layout, context):
row = layout.row()

View File

@@ -43,12 +43,6 @@ class CYCLES_PT_integrator_presets(CyclesPresetPanel):
preset_add_operator = "render.cycles_integrator_preset_add"
class CYCLES_PT_performance_presets(CyclesPresetPanel):
bl_label = "Performance Presets"
preset_subdir = "cycles/performance"
preset_add_operator = "render.cycles_performance_preset_add"
class CyclesButtonsPanel:
bl_space_type = "PROPERTIES"
bl_region_type = "WINDOW"
@@ -117,12 +111,6 @@ def use_optix(context):
return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')
def use_oneapi(context):
cscene = context.scene.cycles
return (get_device_type(context) == 'ONEAPI' and cscene.device == 'GPU')
def use_multi_device(context):
cscene = context.scene.cycles
if cscene.device != 'GPU':
@@ -630,9 +618,6 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
bl_label = "Performance"
bl_options = {'DEFAULT_CLOSED'}
def draw_header_preset(self, context):
CYCLES_PT_performance_presets.draw_panel_header(self.layout)
def draw(self, context):
pass
@@ -952,8 +937,6 @@ class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):
col = split.column()
col.prop(dof, "focus_object", text="Focus Object")
if dof.focus_object and dof.focus_object.type == 'ARMATURE':
col.prop_search(dof, "focus_subtarget", dof.focus_object.data, "bones", text="Focus Bone")
sub = col.row()
sub.active = dof.focus_object is None
@@ -1213,7 +1196,7 @@ class CYCLES_OBJECT_PT_lightgroup(CyclesButtonsPanel, Panel):
sub.prop_search(ob, "lightgroup", view_layer, "lightgroups", text="Light Group", results_are_suggestions=True)
sub = row.column(align=True)
sub.enabled = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
sub.active = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = ob.lightgroup
@@ -1651,7 +1634,7 @@ class CYCLES_WORLD_PT_settings_light_group(CyclesButtonsPanel, Panel):
)
sub = row.column(align=True)
sub.enabled = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
sub.active = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = world.lightgroup
@@ -2280,7 +2263,6 @@ classes = (
CYCLES_PT_sampling_presets,
CYCLES_PT_viewport_sampling_presets,
CYCLES_PT_integrator_presets,
CYCLES_PT_performance_presets,
CYCLES_RENDER_PT_sampling,
CYCLES_RENDER_PT_sampling_viewport,
CYCLES_RENDER_PT_sampling_viewport_denoise,

View File

@@ -143,20 +143,11 @@ static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
if (!b_dof_object)
return b_camera.dof().focus_distance();
Transform dofmat = get_transform(b_dof_object.matrix_world());
string focus_subtarget = b_camera.dof().focus_subtarget();
if (b_dof_object.pose() && !focus_subtarget.empty()) {
BL::PoseBone b_bone = b_dof_object.pose().bones[focus_subtarget];
if (b_bone) {
dofmat = dofmat * get_transform(b_bone.matrix());
}
}
/* for dof object, return distance along camera Z direction */
BL::Array<float, 16> b_ob_matrix;
b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
Transform dofmat = get_transform(b_dof_object.matrix_world());
float3 view_dir = normalize(transform_get_column(&obmat, 2));
float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
return fabsf(dot(view_dir, dof_dir));

View File

@@ -613,6 +613,8 @@ void BlenderSync::sync_particle_hair(
}
}
#ifdef WITH_NEW_CURVES_TYPE
static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves)
{
for (BL::Attribute &b_attribute : b_curves.attributes) {
@@ -630,25 +632,6 @@ static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves
return std::nullopt;
}
static BL::FloatVectorAttribute find_curves_position_attribute(BL::Curves b_curves)
{
for (BL::Attribute &b_attribute : b_curves.attributes) {
if (b_attribute.name() != "position") {
continue;
}
if (b_attribute.domain() != BL::Attribute::domain_POINT) {
continue;
}
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
continue;
}
return BL::FloatVectorAttribute{b_attribute};
}
/* The position attribute must exist. */
assert(false);
return BL::FloatVectorAttribute{b_curves.attributes[0]};
}
template<typename TypeInCycles, typename GetValueAtIndex>
static void fill_generic_attribute(BL::Curves &b_curves,
TypeInCycles *data,
@@ -812,16 +795,16 @@ static void attr_create_generic(Scene *scene,
}
}
static float4 hair_point_as_float4(BL::FloatVectorAttribute b_attr_position,
static float4 hair_point_as_float4(BL::Curves b_curves,
std::optional<BL::FloatAttribute> b_attr_radius,
const int index)
{
float4 mP = float3_to_float4(get_float3(b_attr_position.data[index].vector()));
float4 mP = float3_to_float4(get_float3(b_curves.position_data[index].vector()));
mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.0f;
return mP;
}
static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
static float4 interpolate_hair_points(BL::Curves b_curves,
std::optional<BL::FloatAttribute> b_attr_radius,
const int first_point_index,
const int num_points,
@@ -831,8 +814,8 @@ static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
const int point_a = clamp((int)curve_t, 0, num_points - 1);
const int point_b = min(point_a + 1, num_points - 1);
const float t = curve_t - (float)point_a;
return lerp(hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
return lerp(hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_a),
hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_b),
t);
}
@@ -865,7 +848,6 @@ static void export_hair_curves(Scene *scene,
hair->reserve_curves(num_curves, num_keys);
BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
/* Export curves and points. */
@@ -884,9 +866,9 @@ static void export_hair_curves(Scene *scene,
/* Position and radius. */
for (int i = 0; i < num_points; i++) {
const float3 co = get_float3(b_attr_position.data[first_point_index + i].vector());
const float3 co = get_float3(b_curves.position_data[first_point_index + i].vector());
const float radius = b_attr_radius ? b_attr_radius->data[first_point_index + i].value() :
0.005f;
0.0f;
hair->add_curve_key(co, radius);
if (attr_intercept) {
@@ -941,7 +923,6 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
int num_motion_keys = 0;
int curve_index = 0;
BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
for (int i = 0; i < num_curves; i++) {
@@ -957,7 +938,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
int point_index = first_point_index + i;
if (point_index < num_keys) {
mP[num_motion_keys] = hair_point_as_float4(b_attr_position, b_attr_radius, point_index);
mP[num_motion_keys] = hair_point_as_float4(b_curves, b_attr_radius, point_index);
num_motion_keys++;
if (!have_motion) {
@@ -977,7 +958,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
for (int i = 0; i < curve.num_keys; i++) {
const float step = i * step_size;
mP[num_motion_keys] = interpolate_hair_points(
b_attr_position, b_attr_radius, first_point_index, num_points, step);
b_curves, b_attr_radius, first_point_index, num_points, step);
num_motion_keys++;
}
have_motion = true;
@@ -1009,6 +990,15 @@ void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int
export_hair_curves(scene, hair, b_curves, need_motion, motion_scale);
}
}
#else
void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
{
(void)hair;
(void)b_ob_info;
(void)motion;
(void)motion_step;
}
#endif
void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
{
@@ -1020,11 +1010,14 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
new_hair.set_used_shaders(used_shaders);
if (view_layer.use_hair) {
#ifdef WITH_NEW_CURVES_TYPE
if (b_ob_info.object_data.is_a(&RNA_Curves)) {
/* Hair object. */
sync_hair(&new_hair, b_ob_info, false);
}
else {
else
#endif
{
/* Particle hair. */
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
BL::Mesh b_mesh = object_to_mesh(
@@ -1071,12 +1064,15 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
/* Export deformed coordinates. */
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
#ifdef WITH_NEW_CURVES_TYPE
if (b_ob_info.object_data.is_a(&RNA_Curves)) {
/* Hair object. */
sync_hair(hair, b_ob_info, true, motion_step);
return;
}
else {
else
#endif
{
/* Particle hair. */
BL::Mesh b_mesh = object_to_mesh(
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);

View File

@@ -15,7 +15,6 @@ enum ComputeDevice {
COMPUTE_DEVICE_OPTIX = 3,
COMPUTE_DEVICE_HIP = 4,
COMPUTE_DEVICE_METAL = 5,
COMPUTE_DEVICE_ONEAPI = 6,
COMPUTE_DEVICE_NUM
};
@@ -77,9 +76,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
else if (compute_device == COMPUTE_DEVICE_METAL) {
mask |= DEVICE_MASK_METAL;
}
else if (compute_device == COMPUTE_DEVICE_ONEAPI) {
mask |= DEVICE_MASK_ONEAPI;
}
vector<DeviceInfo> devices = Device::available_devices(mask);
/* Match device preferences and available devices. */

View File

@@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
{
#ifdef WITH_NEW_CURVES_TYPE
if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
#else
if (use_particle_hair) {
#endif
return Geometry::HAIR;
}
@@ -213,7 +217,11 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
if (progress.get_cancel())
return;
#ifdef WITH_NEW_CURVES_TYPE
if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
#else
if (use_particle_hair) {
#endif
Hair *hair = static_cast<Hair *>(geom);
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
}

View File

@@ -1,10 +1,8 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include <optional>
#include "scene/attribute.h"
#include "scene/pointcloud.h"
#include "scene/attribute.h"
#include "scene/scene.h"
#include "blender/sync.h"
@@ -140,36 +138,6 @@ static void copy_attributes(PointCloud *pointcloud,
}
}
static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_pointcloud)
{
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
if (b_attribute.name() != "radius") {
continue;
}
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
continue;
}
return BL::FloatAttribute{b_attribute};
}
return std::nullopt;
}
static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcloud)
{
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
if (b_attribute.name() != "position") {
continue;
}
if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
continue;
}
return BL::FloatVectorAttribute{b_attribute};
}
/* The position attribute must exist. */
assert(false);
return BL::FloatVectorAttribute{b_pointcloud.attributes[0]};
}
static void export_pointcloud(Scene *scene,
PointCloud *pointcloud,
BL::PointCloud b_pointcloud,
@@ -188,18 +156,18 @@ static void export_pointcloud(Scene *scene,
const int num_points = b_pointcloud.points.length();
pointcloud->reserve(num_points);
BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
/* Export points. */
for (int i = 0; i < num_points; i++) {
const float3 co = get_float3(b_attr_position.data[i].vector());
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
BL::PointCloud::points_iterator b_point_iter;
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
++b_point_iter) {
BL::Point b_point = *b_point_iter;
const float3 co = get_float3(b_point.co());
const float radius = b_point.radius();
pointcloud->add_point(co, radius);
/* Random number per point. */
if (attr_random != NULL) {
attr_random->add(hash_uint2_to_float(i, 0));
attr_random->add(hash_uint2_to_float(b_point.index(), 0));
}
}
@@ -227,15 +195,14 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
int num_motion_points = 0;
const array<float3> &pointcloud_points = pointcloud->get_points();
BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
BL::PointCloud::points_iterator b_point_iter;
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
++b_point_iter) {
BL::Point b_point = *b_point_iter;
for (int i = 0; i < num_points; i++) {
if (num_motion_points < num_points) {
const float3 co = get_float3(b_attr_position.data[i].vector());
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
float3 P = co;
P.w = radius;
float3 P = get_float3(b_point.co());
P.w = b_point.radius();
mP[num_motion_points] = P;
have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
num_motion_points++;

View File

@@ -871,20 +871,18 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceType> device_types = Device::available_types();
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false;
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
foreach (DeviceType device_type, device_types) {
has_cuda |= (device_type == DEVICE_CUDA);
has_optix |= (device_type == DEVICE_OPTIX);
has_hip |= (device_type == DEVICE_HIP);
has_metal |= (device_type == DEVICE_METAL);
has_oneapi |= (device_type == DEVICE_ONEAPI);
}
PyObject *list = PyTuple_New(5);
PyObject *list = PyTuple_New(4);
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi));
return list;
}
@@ -916,9 +914,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
else if (override == "METAL") {
BlenderSession::device_override = DEVICE_MASK_METAL;
}
else if (override == "ONEAPI") {
BlenderSession::device_override = DEVICE_MASK_ONEAPI;
}
else {
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
Py_RETURN_FALSE;

View File

@@ -129,7 +129,7 @@ class BVHParams {
top_level = false;
bvh_layout = BVH_LAYOUT_BVH2;
use_compact_structure = false;
use_compact_structure = true;
use_unaligned_nodes = false;
num_motion_curve_steps = 0;

View File

@@ -91,8 +91,6 @@ if(CYCLES_STANDALONE_REPOSITORY)
_set_default(USD_ROOT_DIR "${_cycles_lib_dir}/usd")
_set_default(WEBP_ROOT_DIR "${_cycles_lib_dir}/webp")
_set_default(ZLIB_ROOT "${_cycles_lib_dir}/zlib")
_set_default(LEVEL_ZERO_ROOT_DIR "${_cycles_lib_dir}/level-zero")
_set_default(SYCL_ROOT_DIR "${_cycles_lib_dir}/dpcpp")
# Ignore system libraries
set(CMAKE_IGNORE_PATH "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES};${CMAKE_SYSTEM_INCLUDE_PATH};${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES};${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}")
@@ -649,22 +647,3 @@ if(WITH_CYCLES_DEVICE_METAL)
message(STATUS "Found Metal: ${METAL_LIBRARY}")
endif()
endif()
###########################################################################
# oneAPI
###########################################################################
if (WITH_CYCLES_DEVICE_ONEAPI)
find_package(SYCL)
find_package(LevelZero)
if (SYCL_FOUND AND LEVEL_ZERO_FOUND)
message(STATUS "Found oneAPI: ${SYCL_LIBRARY}")
message(STATUS "Found Level Zero: ${LEVEL_ZERO_LIBRARY}")
else()
message(STATUS "oneAPI or Level Zero not found, disabling oneAPI device from Cycles")
set(WITH_CYCLES_DEVICE_ONEAPI OFF)
endif()
endif()
unset(_cycles_lib_dir)

View File

@@ -82,15 +82,6 @@ set(SRC_HIP
hip/util.h
)
set(SRC_ONEAPI
oneapi/device_impl.cpp
oneapi/device_impl.h
oneapi/device.cpp
oneapi/device.h
oneapi/queue.cpp
oneapi/queue.h
)
set(SRC_DUMMY
dummy/device.cpp
dummy/device.h
@@ -143,7 +134,6 @@ set(SRC
${SRC_DUMMY}
${SRC_MULTI}
${SRC_OPTIX}
${SRC_ONEAPI}
${SRC_HEADERS}
)
@@ -191,9 +181,6 @@ if(WITH_CYCLES_DEVICE_METAL)
${SRC_METAL}
)
endif()
if (WITH_CYCLES_DEVICE_ONEAPI)
add_definitions(-DWITH_ONEAPI)
endif()
if(WITH_OPENIMAGEDENOISE)
list(APPEND LIB
@@ -206,11 +193,6 @@ include_directories(SYSTEM ${INC_SYS})
cycles_add_library(cycles_device "${LIB}" ${SRC})
if(WITH_CYCLES_DEVICE_ONEAPI)
# Need to have proper rebuilding in case of changes in cycles_kernel_oneapi due external project behaviour
add_dependencies(cycles_device cycles_kernel_oneapi)
endif()
source_group("cpu" FILES ${SRC_CPU})
source_group("cuda" FILES ${SRC_CUDA})
source_group("dummy" FILES ${SRC_DUMMY})
@@ -218,5 +200,4 @@ source_group("hip" FILES ${SRC_HIP})
source_group("multi" FILES ${SRC_MULTI})
source_group("metal" FILES ${SRC_METAL})
source_group("optix" FILES ${SRC_OPTIX})
source_group("oneapi" FILES ${SRC_ONEAPI})
source_group("common" FILES ${SRC_BASE} ${SRC_HEADERS})

View File

@@ -197,7 +197,7 @@ void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
// Update scene handle (since it is different for each device on multi devices)
KernelData *const data = (KernelData *)host;
data->device_bvh = embree_scene;
data->bvh.scene = embree_scene;
}
#endif
kernel_const_copy(&kernel_globals, name, host, size);

View File

@@ -16,7 +16,6 @@
#include "device/hip/device.h"
#include "device/metal/device.h"
#include "device/multi/device.h"
#include "device/oneapi/device.h"
#include "device/optix/device.h"
#include "util/foreach.h"
@@ -40,7 +39,6 @@ vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
vector<DeviceInfo> Device::hip_devices;
vector<DeviceInfo> Device::metal_devices;
vector<DeviceInfo> Device::oneapi_devices;
uint Device::devices_initialized_mask = 0;
/* Device */
@@ -103,13 +101,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
device = device_metal_create(info, stats, profiler);
break;
#endif
#ifdef WITH_ONEAPI
case DEVICE_ONEAPI:
device = device_oneapi_create(info, stats, profiler);
break;
#endif
default:
break;
}
@@ -135,8 +126,6 @@ DeviceType Device::type_from_string(const char *name)
return DEVICE_HIP;
else if (strcmp(name, "METAL") == 0)
return DEVICE_METAL;
else if (strcmp(name, "ONEAPI") == 0)
return DEVICE_ONEAPI;
return DEVICE_NONE;
}
@@ -155,8 +144,6 @@ string Device::string_from_type(DeviceType type)
return "HIP";
else if (type == DEVICE_METAL)
return "METAL";
else if (type == DEVICE_ONEAPI)
return "ONEAPI";
return "";
}
@@ -176,9 +163,6 @@ vector<DeviceType> Device::available_types()
#endif
#ifdef WITH_METAL
types.push_back(DEVICE_METAL);
#endif
#ifdef WITH_ONEAPI
types.push_back(DEVICE_ONEAPI);
#endif
return types;
}
@@ -235,20 +219,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
}
#endif
#ifdef WITH_ONEAPI
if (mask & DEVICE_MASK_ONEAPI) {
if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
if (device_oneapi_init()) {
device_oneapi_info(oneapi_devices);
}
devices_initialized_mask |= DEVICE_MASK_ONEAPI;
}
foreach (DeviceInfo &info, oneapi_devices) {
devices.push_back(info);
}
}
#endif
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
@@ -312,15 +282,6 @@ string Device::device_capabilities(uint mask)
}
#endif
#ifdef WITH_ONEAPI
if (mask & DEVICE_MASK_ONEAPI) {
if (device_oneapi_init()) {
capabilities += "\noneAPI device capabilities:\n";
capabilities += device_oneapi_capabilities();
}
}
#endif
#ifdef WITH_METAL
if (mask & DEVICE_MASK_METAL) {
if (device_metal_init()) {
@@ -419,7 +380,6 @@ void Device::free_memory()
cuda_devices.free_memory();
optix_devices.free_memory();
hip_devices.free_memory();
oneapi_devices.free_memory();
cpu_devices.free_memory();
metal_devices.free_memory();
}

View File

@@ -29,7 +29,6 @@ class DeviceQueue;
class Progress;
class CPUKernels;
class CPUKernelThreadGlobals;
class Scene;
/* Device Types */
@@ -41,7 +40,6 @@ enum DeviceType {
DEVICE_OPTIX,
DEVICE_HIP,
DEVICE_METAL,
DEVICE_ONEAPI,
DEVICE_DUMMY,
};
@@ -51,7 +49,6 @@ enum DeviceTypeMask {
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
DEVICE_MASK_METAL = (1 << DEVICE_METAL),
DEVICE_MASK_ONEAPI = (1 << DEVICE_ONEAPI),
DEVICE_MASK_ALL = ~0
};
@@ -187,11 +184,6 @@ class Device {
return 0;
}
/* Called after kernel texture setup, and prior to integrator state setup. */
virtual void optimize_for_scene(Scene * /*scene*/)
{
}
virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
{
/* Memory is always resident if this is not a multi device, regardless of whether the pointer
@@ -281,7 +273,6 @@ class Device {
static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> hip_devices;
static vector<DeviceInfo> metal_devices;
static vector<DeviceInfo> oneapi_devices;
static uint devices_initialized_mask;
};

View File

@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
return (major >= 9);
return (major > 10) || (major == 10 && minor >= 1);
}
CCL_NAMESPACE_END

View File

@@ -34,8 +34,7 @@ void device_metal_info(vector<DeviceInfo> &devices)
int device_index = 0;
for (id<MTLDevice> &device : usable_devices) {
/* Compute unique ID for persistent user preferences. */
string device_name = MetalInfo::get_device_name(device);
string device_name = [device.name UTF8String];
string id = string("METAL_") + device_name;
/* Hardware ID might not be unique, add device number in that case. */
@@ -49,6 +48,12 @@ void device_metal_info(vector<DeviceInfo> &devices)
info.type = DEVICE_METAL;
info.description = string_remove_trademark(string(device_name));
/* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
* GPU */
if (info.description == system_cpu_brand_string()) {
info.description += " (GPU)";
}
info.num = device_index;
/* We don't know if it's used for display, but assume it is. */
info.display_device = true;
@@ -64,15 +69,14 @@ string device_metal_capabilities()
{
string result = "";
auto allDevices = MTLCopyAllDevices();
uint32_t num_devices = (uint32_t)allDevices.count;
uint32_t num_devices = allDevices.count;
if (num_devices == 0) {
return "No Metal devices found\n";
}
result += string_printf("Number of devices: %u\n", num_devices);
for (id<MTLDevice> device in allDevices) {
string device_name = MetalInfo::get_device_name(device);
result += string_printf("\t\tDevice: %s\n", device_name.c_str());
result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
}
return result;

View File

@@ -42,6 +42,7 @@ class MetalDevice : public Device {
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
/*---------------------------------------------------*/
string device_name;
MetalGPUVendor device_vendor;
uint kernel_features;
@@ -75,8 +76,7 @@ class MetalDevice : public Device {
std::vector<id<MTLTexture>> texture_slot_map;
bool use_metalrt = false;
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
std::atomic_bool async_compile_and_load = false;
bool use_function_specialisation = false;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
@@ -92,7 +92,9 @@ class MetalDevice : public Device {
bool use_adaptive_compilation();
void make_source(MetalPipelineType pso_type, const uint kernel_features);
string get_source(const uint kernel_features);
string compile_kernel(const uint kernel_features, const char *name);
virtual bool load_kernels(const uint kernel_features) override;
@@ -110,9 +112,7 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
virtual void optimize_for_scene(Scene *scene) override;
bool compile_and_load(MetalPipelineType pso_type);
id<MTLLibrary> compile(string const &source);
/* ------------------------------------------------------------------ */
/* low-level memory management */

View File

@@ -6,12 +6,9 @@
# include "device/metal/device_impl.h"
# include "device/metal/device.h"
# include "scene/scene.h"
# include "util/debug.h"
# include "util/md5.h"
# include "util/path.h"
# include "util/time.h"
CCL_NAMESPACE_BEGIN
@@ -46,9 +43,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
auto usable_devices = MetalInfo::get_usable_devices();
assert(mtlDevId < usable_devices.size());
mtlDevice = usable_devices[mtlDevId];
device_vendor = MetalInfo::get_device_vendor(mtlDevice);
device_name = [mtlDevice.name UTF8String];
device_vendor = MetalInfo::get_vendor_from_device_name(device_name);
assert(device_vendor != METAL_GPU_UNKNOWN);
metal_printf("Creating new Cycles device for Metal: %s\n", info.description.c_str());
metal_printf("Creating new Cycles device for Metal: %s\n", device_name.c_str());
/* determine default storage mode based on whether UMA is supported */
@@ -80,10 +78,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
case METAL_GPU_APPLE: {
max_threads_per_threadgroup = 512;
use_metalrt = info.use_metalrt;
/* Specialize the intersection kernels on Apple GPUs by default as these can be built very
* quickly. */
kernel_specialization_level = PSO_SPECIALIZED_INTERSECT;
break;
}
}
@@ -96,13 +90,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
capture_enabled = true;
}
if (auto envstr = getenv("CYCLES_METAL_SPECIALIZATION_LEVEL")) {
kernel_specialization_level = (MetalPipelineType)atoi(envstr);
}
metal_printf("kernel_specialization_level = %s\n",
kernel_type_as_string(
(MetalPipelineType)min((int)kernel_specialization_level, (int)PSO_NUM - 1)));
MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init];
arg_desc_params.dataType = MTLDataTypePointer;
arg_desc_params.access = MTLArgumentAccessReadOnly;
@@ -222,86 +209,61 @@ bool MetalDevice::use_adaptive_compilation()
return DebugFlags().metal.adaptive_compile;
}
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
string MetalDevice::get_source(const uint kernel_features)
{
string global_defines;
string build_options;
if (use_adaptive_compilation()) {
global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
build_options += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
}
if (use_metalrt) {
global_defines += "#define __METALRT__\n";
build_options += "-D__METALRT__ ";
if (motion_blur) {
global_defines += "#define __METALRT_MOTION__\n";
build_options += "-D__METALRT_MOTION__ ";
}
}
# ifdef WITH_CYCLES_DEBUG
global_defines += "#define __KERNEL_DEBUG__\n";
build_options += "-D__KERNEL_DEBUG__ ";
# endif
switch (device_vendor) {
default:
break;
case METAL_GPU_INTEL:
global_defines += "#define __KERNEL_METAL_INTEL__\n";
build_options += "-D__KERNEL_METAL_INTEL__ ";
break;
case METAL_GPU_AMD:
global_defines += "#define __KERNEL_METAL_AMD__\n";
build_options += "-D__KERNEL_METAL_AMD__ ";
break;
case METAL_GPU_APPLE:
global_defines += "#define __KERNEL_METAL_APPLE__\n";
build_options += "-D__KERNEL_METAL_APPLE__ ";
break;
}
string &source = this->source[pso_type];
source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
source = path_source_replace_includes(source, path_get("source"));
/* reformat -D defines list into compilable form */
vector<string> components;
string_replace(build_options, "-D", "");
string_split(components, build_options, " ");
/* Perform any required specialization on the source.
* With Metal function constants we can generate a single variant of the kernel source which can
* be repeatedly respecialized.
*/
string baked_constants;
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
* the same character length. Build a string of all active constant values which is then hashed
* in order to identify the PSO.
*/
if (pso_type != PSO_GENERIC) {
const double starttime = time_dt();
# define KERNEL_STRUCT_BEGIN(name, parent) \
string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
/* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
baked_constants += string(#parent "." #name "=") + \
to_string(_type(launch_params.data.parent.name)) + "\n";
# include "kernel/data_template.h"
/* Opt in to all of available specializations. This can be made more granular for the
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
* but the overhead should be negligible as these are very quick to (re)build and aren't
* serialized to disk via MTLBinaryArchives.
*/
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
string globalDefines;
for (const string &component : components) {
vector<string> assignments;
string_split(assignments, component, "=");
if (assignments.size() == 2)
globalDefines += string_printf(
"#define %s %s\n", assignments[0].c_str(), assignments[1].c_str());
else
globalDefines += string_printf("#define %s\n", assignments[0].c_str());
}
source = global_defines + source;
metal_printf("================\n%s================\n\%s================\n",
global_defines.c_str(),
baked_constants.c_str());
string source = globalDefines + "\n#include \"kernel/device/metal/kernel.metal\"\n";
source = path_source_replace_includes(source, path_get("source"));
/* Generate an MD5 from the source and include any baked constants. This is used when caching
* PSOs. */
MD5Hash md5;
md5.append(baked_constants);
md5.append(source);
source_md5[pso_type] = md5.get_hex();
metal_printf("Global defines:\n%s\n", globalDefines.c_str());
return source;
}
bool MetalDevice::load_kernels(const uint _kernel_features)
@@ -317,22 +279,24 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
bool result = compile_and_load(PSO_GENERIC);
source[PSO_GENERIC] = get_source(kernel_features);
mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
MD5Hash md5;
md5.append(source[PSO_GENERIC]);
source_md5[PSO_GENERIC] = md5.get_hex();
metal_printf("Front-end compilation finished (generic)\n");
bool result = MetalDeviceKernels::load(this, false);
reserve_local_memory(kernel_features);
return result;
}
bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
id<MTLLibrary> MetalDevice::compile(string const &source)
{
make_source(pso_type, kernel_features);
if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
/* We already have a full set of matching pipelines which are cached or queued. */
metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
return true;
}
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
@@ -340,30 +304,19 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
options.languageVersion = MTLLanguageVersion2_4;
}
if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
source[pso_type]);
}
const double starttime = time_dt();
NSError *error = NULL;
mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
options:options
error:&error];
id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
options:options
error:&error];
if (!mtlLibrary[pso_type]) {
if (!mtlLibrary) {
NSString *err = [error localizedDescription];
set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
}
metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
time_dt() - starttime,
kernel_type_as_string(pso_type));
[options release];
return MetalDeviceKernels::load(this, pso_type);
return mtlLibrary;
}
void MetalDevice::reserve_local_memory(const uint kernel_features)
@@ -670,63 +623,11 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
return 0;
}
void MetalDevice::optimize_for_scene(Scene *scene)
{
MetalPipelineType specialization_level = kernel_specialization_level;
if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
return;
}
/* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
* synchronously. */
compile_and_load(PSO_SPECIALIZED_INTERSECT);
if (specialization_level < PSO_SPECIALIZED_SHADE) {
return;
}
if (!scene->params.background) {
/* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
* build. */
return;
}
/* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
* only if there isn't an existing load in flight.
*/
auto specialize_shade_fn = ^() {
compile_and_load(PSO_SPECIALIZED_SHADE);
async_compile_and_load = false;
};
bool async_specialize_shade = true;
/* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
async_specialize_shade = false;
}
if (async_specialize_shade) {
if (!async_compile_and_load) {
async_compile_and_load = true;
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
specialize_shade_fn);
}
else {
metal_printf(
"Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
}
}
else {
specialize_shade_fn();
}
}
void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
{
if (strcmp(name, "data") == 0) {
assert(size == sizeof(KernelData));
memcpy((uint8_t *)&launch_params.data, host, sizeof(KernelData));
memcpy((uint8_t *)&launch_params + offsetof(KernelParamsMetal, data), host, size);
return;
}
@@ -747,7 +648,7 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
/* Update data storage pointers in launch parameters. */
if (strcmp(name, "integrator_state") == 0) {
/* IntegratorStateGPU is contiguous pointers */
const size_t pointer_block_size = offsetof(IntegratorStateGPU, sort_partition_divisor);
const size_t pointer_block_size = sizeof(IntegratorStateGPU);
update_launch_pointers(
offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size);
}

View File

@@ -31,7 +31,7 @@ enum {
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
/* Pipeline State Object types */
enum MetalPipelineType {
enum {
/* A kernel that can be used with all scenes, supporting all features.
* It is slow to compile, but only needs to be compiled once and is then
* cached for future render sessions. This allows a render to get underway
@@ -39,33 +39,28 @@ enum MetalPipelineType {
*/
PSO_GENERIC,
/* A intersection kernel that is very quick to specialize and results in faster intersection
* kernel performance. It uses Metal function constants to replace several KernelData variables
* with fixed constants.
/* A kernel that is relatively quick to compile, but is specialized for the
* scene being rendered. It only contains the functionality and even baked in
* constants for values that means it needs to be recompiled whenever a
* dependent setting is changed. The render performance of this kernel is
* significantly faster though, and justifies the extra compile time.
*/
PSO_SPECIALIZED_INTERSECT,
/* A shading kernel that is slow to specialize, but results in faster shading kernel performance
* rendered. It uses Metal function constants to replace several KernelData variables with fixed
* constants and short-circuit all unused SVM node case handlers.
*/
PSO_SPECIALIZED_SHADE,
/* METAL_WIP: This isn't used and will require more changes to enable. */
PSO_SPECIALISED,
PSO_NUM
};
const char *kernel_type_as_string(MetalPipelineType pso_type);
const char *kernel_type_as_string(int kernel_type);
struct MetalKernelPipeline {
void compile();
id<MTLLibrary> mtlLibrary = nil;
MetalPipelineType pso_type;
bool scene_specialized;
string source_md5;
size_t usage_count = 0;
KernelData kernel_data_;
bool use_metalrt;
bool metalrt_hair;
bool metalrt_hair_thick;
@@ -80,8 +75,6 @@ struct MetalKernelPipeline {
id<MTLComputePipelineState> pipeline = nil;
int num_threads_per_block = 0;
bool should_use_binary_archive() const;
string error_str;
API_AVAILABLE(macos(11.0))
@@ -92,8 +85,7 @@ struct MetalKernelPipeline {
/* Cache of Metal kernels for each DeviceKernel. */
namespace MetalDeviceKernels {
bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
bool load(MetalDevice *device, MetalPipelineType pso_type);
bool load(MetalDevice *device, bool scene_specialized);
const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
} /* namespace MetalDeviceKernels */

View File

@@ -5,7 +5,6 @@
# include "device/metal/kernel.h"
# include "device/metal/device_impl.h"
# include "kernel/device/metal/function_constants.h"
# include "util/md5.h"
# include "util/path.h"
# include "util/tbb.h"
@@ -17,15 +16,13 @@ CCL_NAMESPACE_BEGIN
/* limit to 2 MTLCompiler instances */
int max_mtlcompiler_threads = 2;
const char *kernel_type_as_string(MetalPipelineType pso_type)
const char *kernel_type_as_string(int kernel_type)
{
switch (pso_type) {
switch (kernel_type) {
case PSO_GENERIC:
return "PSO_GENERIC";
case PSO_SPECIALIZED_INTERSECT:
return "PSO_SPECIALIZED_INTERSECT";
case PSO_SPECIALIZED_SHADE:
return "PSO_SPECIALIZED_SHADE";
case PSO_SPECIALISED:
return "PSO_SPECIALISED";
default:
assert(0);
}
@@ -53,11 +50,7 @@ struct ShaderCache {
/* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
* device. */
void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
bool should_load_kernel(DeviceKernel device_kernel,
MetalDevice *device,
MetalPipelineType pso_type);
void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
void wait_for_all();
@@ -146,53 +139,9 @@ void ShaderCache::compile_thread_func(int thread_index)
}
}
bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
MetalDevice *device,
MetalPipelineType pso_type)
{
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
/* Skip megakernel. */
return false;
}
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
if ((device->kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) {
/* Skip shade_surface_raytrace kernel if the scene doesn't require it. */
return false;
}
}
if (pso_type != PSO_GENERIC) {
/* Only specialize kernels where it can make an impact. */
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
return false;
}
/* Only specialize shading / intersection kernels as requested. */
bool is_shade_kernel = (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
if (is_shade_pso != is_shade_kernel) {
return false;
}
}
{
/* check whether the kernel has already been requested / cached */
thread_scoped_lock lock(cache_mutex);
for (auto &pipeline : pipelines[device_kernel]) {
if (pipeline->source_md5 == device->source_md5[pso_type]) {
return false;
}
}
}
return true;
}
void ShaderCache::load_kernel(DeviceKernel device_kernel,
MetalDevice *device,
MetalPipelineType pso_type)
bool scene_specialized)
{
{
/* create compiler threads on first run */
@@ -205,21 +154,52 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
}
}
if (!should_load_kernel(device_kernel, device, pso_type)) {
if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
/* skip megakernel */
return;
}
if (scene_specialized) {
/* Only specialize kernels where it can make an impact. */
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
return;
}
}
{
/* check whether the kernel has already been requested / cached */
thread_scoped_lock lock(cache_mutex);
for (auto &pipeline : pipelines[device_kernel]) {
if (scene_specialized) {
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
/* we already requested a pipeline that is specialized for this kernel data */
metal_printf("Specialized kernel already requested (%s)\n",
device_kernel_as_string(device_kernel));
return;
}
}
else {
if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
/* we already requested a generic pipeline for this kernel */
metal_printf("Generic kernel already requested (%s)\n",
device_kernel_as_string(device_kernel));
return;
}
}
}
}
incomplete_requests++;
PipelineRequest request;
request.pipeline = new MetalKernelPipeline;
memcpy(&request.pipeline->kernel_data_,
&device->launch_params.data,
sizeof(request.pipeline->kernel_data_));
request.pipeline->pso_type = pso_type;
request.pipeline->scene_specialized = scene_specialized;
request.pipeline->mtlDevice = mtlDevice;
request.pipeline->source_md5 = device->source_md5[pso_type];
request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
request.pipeline->source_md5 =
device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
request.pipeline->mtlLibrary =
device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
request.pipeline->device_kernel = device_kernel;
request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
@@ -234,24 +214,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
{
thread_scoped_lock lock(cache_mutex);
auto &collection = pipelines[device_kernel];
/* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
int max_entries_of_same_pso_type = 3;
for (int i = (int)collection.size() - 1; i >= 0; i--) {
if (collection[i]->pso_type == pso_type) {
max_entries_of_same_pso_type -= 1;
if (max_entries_of_same_pso_type == 0) {
metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
kernel_type_as_string(pso_type),
device_kernel_as_string(device_kernel));
collection.erase(collection.begin() + i);
break;
}
}
}
collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
request_queue.push_back(request);
}
cond_var.notify_one();
@@ -285,9 +248,8 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
continue;
}
if (pipeline->pso_type != PSO_GENERIC) {
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
if (pipeline->scene_specialized) {
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
best_pipeline = pipeline.get();
}
}
@@ -296,65 +258,13 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
}
}
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_pipeline->pso_type),
device_kernel_as_string(kernel));
}
best_pipeline->usage_count += 1;
return best_pipeline;
}
bool MetalKernelPipeline::should_use_binary_archive() const
{
if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
if (atoi(str) != 0) {
/* Don't archive if we have opted out by env var. */
return false;
}
}
if (pso_type == PSO_GENERIC) {
/* Archive the generic kernels. */
return true;
}
if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
/* Archive all shade kernels - they take a long time to compile. */
return true;
}
/* The remaining kernels are all fast to compile. They may get cached by the system shader cache,
* but will be quick to regenerate if not. */
return false;
}
static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nullptr)
{
MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues new];
MTLDataType MTLDataType_int = MTLDataTypeInt;
MTLDataType MTLDataType_float = MTLDataTypeFloat;
MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
KernelData zero_data = {0};
if (!data) {
data = &zero_data;
}
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
[constant_values setConstantValue:&data->parent.name \
type:MTLDataType_##_type \
atIndex:KernelData_##parent##_##name];
# include "kernel/data_template.h"
return constant_values;
}
void MetalKernelPipeline::compile()
{
int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
const std::string function_name = std::string("cycles_metal_") +
device_kernel_as_string(device_kernel);
@@ -371,17 +281,6 @@ void MetalKernelPipeline::compile()
if (@available(macOS 11.0, *)) {
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = entryPoint;
if (pso_type == PSO_SPECIALIZED_SHADE) {
func_desc.constantValues = GetConstantValues(&kernel_data_);
}
else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
func_desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
func_desc.constantValues = GetConstantValues();
}
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
}
@@ -528,7 +427,10 @@ void MetalKernelPipeline::compile()
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
bool use_binary_archive = should_use_binary_archive();
bool use_binary_archive = true;
if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
use_binary_archive = (atoi(str) == 0);
}
id<MTLBinaryArchive> archive = nil;
string metalbin_path;
@@ -706,32 +608,19 @@ void MetalKernelPipeline::compile()
}
}
bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
{
const double starttime = time_dt();
auto shader_cache = get_shader_cache(device->mtlDevice);
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
}
shader_cache->wait_for_all();
metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
time_dt() - starttime,
kernel_type_as_string(pso_type));
if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
shader_cache->wait_for_all();
}
return true;
}
bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
{
auto shader_cache = get_shader_cache(device->mtlDevice);
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
return true;
}
}
return false;
}
const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
DeviceKernel kernel)
{

View File

@@ -24,7 +24,6 @@ class MetalDeviceQueue : public DeviceQueue {
virtual int num_concurrent_states(const size_t) const override;
virtual int num_concurrent_busy_states() const override;
virtual int num_sort_partition_elements() const override;
virtual void init_execution() override;

View File

@@ -293,11 +293,6 @@ int MetalDeviceQueue::num_concurrent_busy_states() const
return result;
}
int MetalDeviceQueue::num_sort_partition_elements() const
{
return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
}
void MetalDeviceQueue::init_execution()
{
/* Synchronize all textures and memory copies before executing task. */
@@ -364,7 +359,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
/* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
/* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) +
offsetof(IntegratorStateGPU, sort_partition_divisor);
sizeof(IntegratorStateGPU);
size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
(uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
@@ -421,7 +416,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) +
offsetof(IntegratorStateGPU, sort_partition_divisor);
sizeof(IntegratorStateGPU);
for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
int pointer_index = int(offset / sizeof(device_ptr));
MetalDevice::MetalMem *mmem = *(
@@ -555,7 +550,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
/* Enhanced command buffer errors are only available in 11.0+ */
if (@available(macos 11.0, *)) {
if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
metal_device_->set_error(string("CommandBuffer Failed: ") + [kernel_name UTF8String]);
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
valueForKey:MTLCommandBufferEncoderInfoErrorKey];
if (encoderInfos != nil) {
@@ -569,7 +564,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
}
else if (command_buffer.error) {
metal_device_->set_error(string("CommandBuffer Failed: ") + [kernel_name UTF8String]);
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
}
}
}];

View File

@@ -25,20 +25,10 @@ enum MetalGPUVendor {
METAL_GPU_INTEL = 3,
};
enum AppleGPUArchitecture {
APPLE_UNKNOWN,
APPLE_M1,
APPLE_M2,
};
/* Contains static Metal helper functions. */
struct MetalInfo {
static vector<id<MTLDevice>> const &get_usable_devices();
static int get_apple_gpu_core_count(id<MTLDevice> device);
static MetalGPUVendor get_device_vendor(id<MTLDevice> device);
static AppleGPUArchitecture get_apple_gpu_architecture(id<MTLDevice> device);
static int optimal_sort_partition_elements(id<MTLDevice> device);
static string get_device_name(id<MTLDevice> device);
static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
};
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */

View File

@@ -10,83 +10,26 @@
# include "util/string.h"
# include "util/time.h"
# include <IOKit/IOKitLib.h>
# include <pwd.h>
# include <sys/shm.h>
# include <time.h>
CCL_NAMESPACE_BEGIN
string MetalInfo::get_device_name(id<MTLDevice> device)
MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
{
string device_name = [device.name UTF8String];
if (get_device_vendor(device) == METAL_GPU_APPLE) {
/* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
int gpu_core_count = get_apple_gpu_core_count(device);
device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
}
return device_name;
}
int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
{
int core_count = 0;
if (@available(macos 12.0, *)) {
io_service_t gpu_service = IOServiceGetMatchingService(
kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
gpu_service, CFSTR("gpu-core-count"), 0, 0)) {
if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
}
CFRelease(numberRef);
}
}
return core_count;
}
AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
{
const char *device_name = [device.name UTF8String];
if (strstr(device_name, "M1")) {
return APPLE_M1;
}
else if (strstr(device_name, "M2")) {
return APPLE_M2;
}
return APPLE_UNKNOWN;
}
MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
{
const char *device_name = [device.name UTF8String];
if (strstr(device_name, "Intel")) {
if (device_name.find("Intel") != string::npos) {
return METAL_GPU_INTEL;
}
else if (strstr(device_name, "AMD")) {
else if (device_name.find("AMD") != string::npos) {
return METAL_GPU_AMD;
}
else if (strstr(device_name, "Apple")) {
else if (device_name.find("Apple") != string::npos) {
return METAL_GPU_APPLE;
}
return METAL_GPU_UNKNOWN;
}
int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device)
{
if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
return atoi(str);
}
/* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
* sorting each partition by material. Partitioning into chunks of 65536 elements results in an
* overall render time speedup of up to 15%. */
if (get_device_vendor(device) == METAL_GPU_APPLE) {
return 65536;
}
return 0;
}
vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
{
static vector<id<MTLDevice>> usable_devices;
@@ -98,8 +41,9 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
metal_printf("Usable Metal devices:\n");
for (id<MTLDevice> device in MTLCopyAllDevices()) {
string device_name = get_device_name(device);
MetalGPUVendor vendor = get_device_vendor(device);
const char *device_name = [device.name UTF8String];
MetalGPUVendor vendor = get_vendor_from_device_name(device_name);
bool usable = false;
if (@available(macos 12.2, *)) {
@@ -111,12 +55,12 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
}
if (usable) {
metal_printf("- %s\n", device_name.c_str());
metal_printf("- %s\n", device_name);
[device retain];
usable_devices.push_back(device);
}
else {
metal_printf(" (skipping \"%s\")\n", device_name.c_str());
metal_printf(" (skipping \"%s\")\n", device_name);
}
}
if (usable_devices.empty()) {

View File

@@ -1,185 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Intel Corporation */
#include "device/oneapi/device.h"
#include "util/log.h"
#ifdef WITH_ONEAPI
# include "device/device.h"
# include "device/oneapi/device_impl.h"
# include "util/path.h"
# include "util/string.h"
# ifdef __linux__
# include <dlfcn.h>
# endif
#endif /* WITH_ONEAPI */
CCL_NAMESPACE_BEGIN
#ifdef WITH_ONEAPI
static OneAPIDLLInterface oneapi_dll;
#endif
#ifdef _WIN32
# define LOAD_ONEAPI_SHARED_LIBRARY(path) (void *)(LoadLibrary(path))
# define FREE_SHARED_LIBRARY(handle) FreeLibrary((HMODULE)handle)
# define GET_SHARED_LIBRARY_SYMBOL(handle, name) GetProcAddress((HMODULE)handle, name)
#elif __linux__
# define LOAD_ONEAPI_SHARED_LIBRARY(path) dlopen(path, RTLD_NOW)
# define FREE_SHARED_LIBRARY(handle) dlclose(handle)
# define GET_SHARED_LIBRARY_SYMBOL(handle, name) dlsym(handle, name)
#endif
bool device_oneapi_init()
{
#if !defined(WITH_ONEAPI)
return false;
#else
string lib_path = path_get("lib");
# ifdef _WIN32
lib_path = path_join(lib_path, "cycles_kernel_oneapi.dll");
# else
lib_path = path_join(lib_path, "cycles_kernel_oneapi.so");
# endif
void *lib_handle = LOAD_ONEAPI_SHARED_LIBRARY(lib_path.c_str());
/* This shouldn't happen, but it still makes sense to have a branch for this. */
if (lib_handle == NULL) {
LOG(ERROR) << "oneAPI kernel shared library cannot be loaded for some reason. This should not "
"happen, however, it occurs hence oneAPI rendering will be disabled";
return false;
}
# define DLL_INTERFACE_CALL(function, return_type, ...) \
(oneapi_dll.function) = reinterpret_cast<decltype(oneapi_dll.function)>( \
GET_SHARED_LIBRARY_SYMBOL(lib_handle, #function)); \
if (oneapi_dll.function == NULL) { \
LOG(ERROR) << "oneAPI shared library function \"" << #function \
<< "\" has not been loaded from kernel shared - disable oneAPI " \
"library disable oneAPI implementation due to this"; \
FREE_SHARED_LIBRARY(lib_handle); \
return false; \
}
# include "kernel/device/oneapi/dll_interface_template.h"
# undef DLL_INTERFACE_CALL
VLOG_INFO << "oneAPI kernel shared library has been loaded successfully";
/* We need to have this oneapi kernel shared library during all life-span of the Blender.
* So it is not unloaded because of this.
* FREE_SHARED_LIBRARY(lib_handle); */
/* NOTE(@nsirgien): we need to enable JIT cache from here and
* right now this cache policy is controlled by env. variables. */
/* NOTE(hallade) we also disable use of copy engine as it
* improves stability as of intel/LLVM SYCL-nightly/20220529.
* All these env variable can be set beforehand by end-users and
* will in that case -not- be overwritten. */
# ifdef _WIN32
if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
_putenv_s("SYCL_CACHE_PERSISTENT", "1");
}
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
}
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
_putenv_s("SYCL_DEVICE_FILTER", "host,level_zero");
}
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
_putenv_s("SYCL_ENABLE_PCI", "1");
}
if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE") == nullptr) {
_putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0");
}
# elif __linux__
setenv("SYCL_CACHE_PERSISTENT", "1", false);
setenv("SYCL_CACHE_THRESHOLD", "0", false);
setenv("SYCL_DEVICE_FILTER", "host,level_zero", false);
setenv("SYCL_ENABLE_PCI", "1", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
# endif
return true;
#endif
}
#if defined(_WIN32) || defined(__linux__)
# undef LOAD_SYCL_SHARED_LIBRARY
# undef LOAD_ONEAPI_SHARED_LIBRARY
# undef FREE_SHARED_LIBRARY
# undef GET_SHARED_LIBRARY_SYMBOL
#endif
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
{
#ifdef WITH_ONEAPI
return new OneapiDevice(info, oneapi_dll, stats, profiler);
#else
(void)info;
(void)stats;
(void)profiler;
LOG(FATAL) << "Requested to create oneAPI device while not enabled for this build.";
return nullptr;
#endif
}
#ifdef WITH_ONEAPI
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
{
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
DeviceInfo info;
info.type = DEVICE_ONEAPI;
info.description = name;
info.num = num;
/* NOTE(@nsirgien): Should be unique at least on proper oneapi installation. */
info.id = id;
info.has_nanovdb = true;
info.denoisers = 0;
info.has_gpu_queue = true;
/* NOTE(@nsirgien): oneAPI right now is focused on one device usage. In future it maybe will
* change, but right now peer access from one device to another device is not supported. */
info.has_peer_memory = false;
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
info.display_device = false;
devices->push_back(info);
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
}
#endif
void device_oneapi_info(vector<DeviceInfo> &devices)
{
#ifdef WITH_ONEAPI
(oneapi_dll.oneapi_iterate_devices)(device_iterator_cb, &devices);
#else /* WITH_ONEAPI */
(void)devices;
#endif /* WITH_ONEAPI */
}
string device_oneapi_capabilities()
{
string capabilities;
#ifdef WITH_ONEAPI
char *c_capabilities = (oneapi_dll.oneapi_device_capabilities)();
if (c_capabilities) {
capabilities = c_capabilities;
(oneapi_dll.oneapi_free)(c_capabilities);
}
#endif
return capabilities;
}
CCL_NAMESPACE_END

View File

@@ -1,24 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#include "util/string.h"
#include "util/vector.h"
CCL_NAMESPACE_BEGIN
class Device;
class DeviceInfo;
class Profiler;
class Stats;
bool device_oneapi_init();
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
void device_oneapi_info(vector<DeviceInfo> &devices);
string device_oneapi_capabilities();
CCL_NAMESPACE_END

View File

@@ -1,426 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Intel Corporation */
#ifdef WITH_ONEAPI
# include "device/oneapi/device_impl.h"
# include "util/debug.h"
# include "util/log.h"
# include "kernel/device/oneapi/kernel.h"
CCL_NAMESPACE_BEGIN
static void queue_error_cb(const char *message, void *user_ptr)
{
if (user_ptr) {
*reinterpret_cast<std::string *>(user_ptr) = message;
}
}
OneapiDevice::OneapiDevice(const DeviceInfo &info,
OneAPIDLLInterface &oneapi_dll_object,
Stats &stats,
Profiler &profiler)
: Device(info, stats, profiler),
device_queue_(nullptr),
texture_info_(this, "texture_info", MEM_GLOBAL),
kg_memory_(nullptr),
kg_memory_device_(nullptr),
kg_memory_size_(0),
oneapi_dll_(oneapi_dll_object)
{
need_texture_info_ = false;
oneapi_dll_.oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
/* OneAPI calls should be initialized on this moment. */
assert(oneapi_dll_.oneapi_create_queue != nullptr);
bool is_finished_ok = oneapi_dll_.oneapi_create_queue(device_queue_, info.num);
if (is_finished_ok == false) {
set_error("oneAPI queue initialization error: got runtime exception \"" +
oneapi_error_string_ + "\"");
}
else {
VLOG_DEBUG << "oneAPI queue has been successfully created for the device \""
<< info.description << "\"";
assert(device_queue_);
}
size_t globals_segment_size;
is_finished_ok = oneapi_dll_.oneapi_kernel_globals_size(device_queue_, globals_segment_size);
if (is_finished_ok == false) {
set_error("oneAPI constant memory initialization got runtime exception \"" +
oneapi_error_string_ + "\"");
}
else {
VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)";
}
kg_memory_ = oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
oneapi_dll_.oneapi_usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
kg_memory_device_ = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, globals_segment_size);
kg_memory_size_ = globals_segment_size;
}
OneapiDevice::~OneapiDevice()
{
texture_info_.free();
oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_);
oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_device_);
for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
delete mt->second;
if (device_queue_)
oneapi_dll_.oneapi_free_queue(device_queue_);
}
bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
{
return false;
}
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}
bool OneapiDevice::load_kernels(const uint requested_features)
{
assert(device_queue_);
/* NOTE(@nsirgien): oneAPI can support compilation of kernel code with certain feature set
* with specialization constants, but it hasn't been implemented yet. */
(void)requested_features;
bool is_finished_ok = oneapi_dll_.oneapi_run_test_kernel(device_queue_);
if (is_finished_ok == false) {
set_error("oneAPI kernel load: got runtime exception \"" + oneapi_error_string_ + "\"");
}
else {
VLOG_INFO << "Runtime compilation done for \"" << info.description << "\"";
assert(device_queue_);
}
return is_finished_ok;
}
void OneapiDevice::load_texture_info()
{
if (need_texture_info_) {
need_texture_info_ = false;
texture_info_.copy_to_device();
}
}
void OneapiDevice::generic_alloc(device_memory &mem)
{
size_t memory_size = mem.memory_size();
/* TODO(@nsirgien): In future, if scene doesn't fit into device memory, then
* we can use USM host memory.
* Because of the expected performance impact, implementation of this has had a low priority
* and is not implemented yet. */
assert(device_queue_);
/* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
* and shared. For new project it maybe more beneficial to use USM shared memory, because it
* provides automatic migration mechanism in order to allow to use the same pointer on host and
* on device, without need to worry about explicit memory transfer operations. But for
* Blender/Cycles this type of memory is not very suitable in current application architecture,
* because Cycles already uses two different pointer for host activity and device activity, and
* also has to perform all needed memory transfer operations. So, USM device memory
* type has been used for oneAPI device in order to better fit in Cycles architecture. */
void *device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size);
if (device_pointer == nullptr) {
size_t max_memory_on_device = oneapi_dll_.oneapi_get_memcapacity(device_queue_);
set_error("oneAPI kernel - device memory allocation error for " +
string_human_readable_size(mem.memory_size()) +
", possibly caused by lack of available memory space on the device: " +
string_human_readable_size(stats.mem_used) + " of " +
string_human_readable_size(max_memory_on_device) + " is already allocated");
return;
}
assert(device_pointer);
mem.device_pointer = reinterpret_cast<ccl::device_ptr>(device_pointer);
mem.device_size = memory_size;
stats.mem_alloc(memory_size);
}
void OneapiDevice::generic_copy_to(device_memory &mem)
{
size_t memory_size = mem.memory_size();
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
assert(mem.host_pointer);
assert(device_queue_);
oneapi_dll_.oneapi_usm_memcpy(
device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
}
/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
SyclQueue *OneapiDevice::sycl_queue()
{
return device_queue_;
}
string OneapiDevice::oneapi_error_message()
{
return string(oneapi_error_string_);
}
OneAPIDLLInterface OneapiDevice::oneapi_dll_object()
{
return oneapi_dll_;
}
void *OneapiDevice::kernel_globals_device_pointer()
{
return kg_memory_device_;
}
void OneapiDevice::generic_free(device_memory &mem)
{
assert(mem.device_pointer);
stats.mem_free(mem.device_size);
mem.device_size = 0;
assert(device_queue_);
oneapi_dll_.oneapi_usm_free(device_queue_, (void *)mem.device_pointer);
mem.device_pointer = 0;
}
void OneapiDevice::mem_alloc(device_memory &mem)
{
if (mem.type == MEM_TEXTURE) {
assert(!"mem_alloc not supported for textures.");
}
else if (mem.type == MEM_GLOBAL) {
assert(!"mem_alloc not supported for global memory.");
}
else {
if (mem.name) {
VLOG_DEBUG << "OneapiDevice::mem_alloc: \"" << mem.name << "\", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
generic_alloc(mem);
}
}
void OneapiDevice::mem_copy_to(device_memory &mem)
{
if (mem.name) {
VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
if (mem.type == MEM_GLOBAL) {
global_free(mem);
global_alloc(mem);
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer)
mem_alloc(mem);
generic_copy_to(mem);
}
}
void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
{
if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
assert(!"mem_copy_from not supported for textures.");
}
else if (mem.host_pointer) {
const size_t size = (w > 0 || h > 0 || elem > 0) ? (elem * w * h) : mem.memory_size();
const size_t offset = elem * y * w;
if (mem.name) {
VLOG_DEBUG << "OneapiDevice::mem_copy_from: \"" << mem.name << "\" object of "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ") from offset " << offset
<< " data " << size << " bytes";
}
assert(device_queue_);
assert(size != 0);
assert(mem.device_pointer);
char *shifted_host = reinterpret_cast<char *>(mem.host_pointer) + offset;
char *shifted_device = reinterpret_cast<char *>(mem.device_pointer) + offset;
bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy(
device_queue_, shifted_host, shifted_device, size);
if (is_finished_ok == false) {
set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
"\"");
}
}
}
void OneapiDevice::mem_zero(device_memory &mem)
{
if (mem.name) {
VLOG_DEBUG << "OneapiDevice::mem_zero: \"" << mem.name << "\", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")\n";
}
if (!mem.device_pointer) {
mem_alloc(mem);
}
if (!mem.device_pointer) {
return;
}
assert(device_queue_);
bool is_finished_ok = oneapi_dll_.oneapi_usm_memset(
device_queue_, (void *)mem.device_pointer, 0, mem.memory_size());
if (is_finished_ok == false) {
set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
"\"");
}
}
void OneapiDevice::mem_free(device_memory &mem)
{
if (mem.name) {
VLOG_DEBUG << "OneapiDevice::mem_free: \"" << mem.name << "\", "
<< string_human_readable_number(mem.device_size) << " bytes. ("
<< string_human_readable_size(mem.device_size) << ")\n";
}
if (mem.type == MEM_GLOBAL) {
global_free(mem);
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
}
else {
generic_free(mem);
}
}
device_ptr OneapiDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
{
return reinterpret_cast<device_ptr>(reinterpret_cast<char *>(mem.device_pointer) +
mem.memory_elements_size(offset));
}
void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
{
assert(name);
VLOG_DEBUG << "OneapiDevice::const_copy_to \"" << name << "\" object "
<< string_human_readable_number(size) << " bytes. ("
<< string_human_readable_size(size) << ")";
ConstMemMap::iterator i = const_mem_map_.find(name);
device_vector<uchar> *data;
if (i == const_mem_map_.end()) {
data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
data->alloc(size);
const_mem_map_.insert(ConstMemMap::value_type(name, data));
}
else {
data = i->second;
}
assert(data->memory_size() <= size);
memcpy(data->data(), host, size);
data->copy_to_device();
oneapi_dll_.oneapi_set_global_memory(
device_queue_, kg_memory_, name, (void *)data->device_pointer);
oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
}
void OneapiDevice::global_alloc(device_memory &mem)
{
assert(mem.name);
size_t size = mem.memory_size();
VLOG_DEBUG << "OneapiDevice::global_alloc \"" << mem.name << "\" object "
<< string_human_readable_number(size) << " bytes. ("
<< string_human_readable_size(size) << ")";
generic_alloc(mem);
generic_copy_to(mem);
oneapi_dll_.oneapi_set_global_memory(
device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
}
void OneapiDevice::global_free(device_memory &mem)
{
if (mem.device_pointer) {
generic_free(mem);
}
}
void OneapiDevice::tex_alloc(device_texture &mem)
{
generic_alloc(mem);
generic_copy_to(mem);
/* Resize if needed. Also, in case of resize - allocate in advance for future allocs. */
const uint slot = mem.slot;
if (slot >= texture_info_.size()) {
texture_info_.resize(slot + 128);
}
texture_info_[slot] = mem.info;
need_texture_info_ = true;
texture_info_[slot].data = (uint64_t)mem.device_pointer;
}
void OneapiDevice::tex_free(device_texture &mem)
{
/* There is no texture memory in SYCL. */
if (mem.device_pointer) {
generic_free(mem);
}
}
unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
{
return make_unique<OneapiDeviceQueue>(this);
}
bool OneapiDevice::should_use_graphics_interop()
{
/* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
* return false. */
return false;
}
void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment)
{
assert(device_queue_);
return oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, memory_size, alignment);
}
void OneapiDevice::usm_free(void *usm_ptr)
{
assert(device_queue_);
return oneapi_dll_.oneapi_usm_free(device_queue_, usm_ptr);
}
CCL_NAMESPACE_END
#endif

View File

@@ -1,100 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Intel Corporation */
#ifdef WITH_ONEAPI
# include "device/device.h"
# include "device/oneapi/device.h"
# include "device/oneapi/queue.h"
# include "util/map.h"
CCL_NAMESPACE_BEGIN
class DeviceQueue;
class OneapiDevice : public Device {
private:
SyclQueue *device_queue_;
using ConstMemMap = map<string, device_vector<uchar> *>;
ConstMemMap const_mem_map_;
device_vector<TextureInfo> texture_info_;
bool need_texture_info_;
void *kg_memory_;
void *kg_memory_device_;
size_t kg_memory_size_ = (size_t)0;
OneAPIDLLInterface oneapi_dll_;
std::string oneapi_error_string_;
public:
virtual BVHLayoutMask get_bvh_layout_mask() const override;
OneapiDevice(const DeviceInfo &info,
OneAPIDLLInterface &oneapi_dll_object,
Stats &stats,
Profiler &profiler);
virtual ~OneapiDevice();
bool check_peer_access(Device *peer_device) override;
bool load_kernels(const uint requested_features) override;
void load_texture_info();
void generic_alloc(device_memory &mem);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
SyclQueue *sycl_queue();
string oneapi_error_message();
OneAPIDLLInterface oneapi_dll_object();
void *kernel_globals_device_pointer();
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
void mem_copy_from(device_memory &mem)
{
mem_copy_from(mem, 0, 0, 0, 0);
}
void mem_zero(device_memory &mem) override;
void mem_free(device_memory &mem) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;
void global_alloc(device_memory &mem);
void global_free(device_memory &mem);
void tex_alloc(device_texture &mem);
void tex_free(device_texture &mem);
/* Graphics resources interoperability. */
virtual bool should_use_graphics_interop() override;
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
/* NOTE(@nsirgien): Create this methods to avoid some compilation problems on Windows with host
* side compilation (MSVC). */
void *usm_aligned_alloc_host(size_t memory_size, size_t alignment);
void usm_free(void *usm_ptr);
};
CCL_NAMESPACE_END
#endif

View File

@@ -1,17 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
/* Include kernel header to get access to SYCL-specific types, like SyclQueue and
* OneAPIDeviceIteratorCallback. */
#include "kernel/device/oneapi/kernel.h"
#ifdef WITH_ONEAPI
struct OneAPIDLLInterface {
# define DLL_INTERFACE_CALL(function, return_type, ...) \
return_type (*function)(__VA_ARGS__) = nullptr;
# include "kernel/device/oneapi/dll_interface_template.h"
# undef DLL_INTERFACE_CALL
};
#endif

View File

@@ -1,165 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Intel Corporation */
#ifdef WITH_ONEAPI
# include "device/oneapi/queue.h"
# include "device/oneapi/device_impl.h"
# include "util/log.h"
# include "util/time.h"
# include <iomanip>
# include <vector>
# include "kernel/device/oneapi/kernel.h"
CCL_NAMESPACE_BEGIN
struct KernelExecutionInfo {
double elapsed_summary = 0.0;
int enqueue_count = 0;
};
/* OneapiDeviceQueue */
OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
: DeviceQueue(device),
oneapi_device_(device),
oneapi_dll_(device->oneapi_dll_object()),
kernel_context_(nullptr)
{
}
OneapiDeviceQueue::~OneapiDeviceQueue()
{
delete kernel_context_;
}
int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
{
int num_states;
/* TODO: implement and use get_num_multiprocessors and get_max_num_threads_per_multiprocessor. */
const size_t compute_units = oneapi_dll_.oneapi_get_compute_units_amount(
oneapi_device_->sycl_queue());
if (compute_units >= 128) {
/* dGPU path, make sense to allocate more states, because it will be dedicated GPU memory. */
int base = 1024 * 1024;
/* linear dependency (with coefficient less that 1) from amount of compute units. */
num_states = (base * (compute_units / 128)) * 3 / 4;
/* Limit amount of integrator states by one quarter of device memory, because
* other allocations will need some space as well
* TODO: base this calculation on the how many states what the GPU is actually capable of
* running, with some headroom to improve occupancy. If the texture don't fit, offload into
* unified memory. */
size_t states_memory_size = num_states * state_size;
size_t device_memory_amount =
(oneapi_dll_.oneapi_get_memcapacity)(oneapi_device_->sycl_queue());
if (states_memory_size >= device_memory_amount / 4) {
num_states = device_memory_amount / 4 / state_size;
}
}
else {
/* iGPU path - no real need to allocate a lot of integrator states because it is shared GPU
* memory. */
num_states = 1024 * 512;
}
VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
<< string_human_readable_size(num_states * state_size);
return num_states;
}
int OneapiDeviceQueue::num_concurrent_busy_states() const
{
const size_t compute_units = oneapi_dll_.oneapi_get_compute_units_amount(
oneapi_device_->sycl_queue());
if (compute_units >= 128) {
return 1024 * 1024;
}
else {
return 1024 * 512;
}
}
void OneapiDeviceQueue::init_execution()
{
oneapi_device_->load_texture_info();
SyclQueue *device_queue = oneapi_device_->sycl_queue();
void *kg_dptr = (void *)oneapi_device_->kernel_globals_device_pointer();
assert(device_queue);
assert(kg_dptr);
kernel_context_ = new KernelContext{device_queue, kg_dptr};
debug_init_execution();
}
bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
const int signed_kernel_work_size,
DeviceKernelArguments const &_args)
{
if (oneapi_device_->have_error()) {
return false;
}
void **args = const_cast<void **>(_args.values);
debug_enqueue(kernel, signed_kernel_work_size);
assert(signed_kernel_work_size >= 0);
size_t kernel_work_size = (size_t)signed_kernel_work_size;
size_t kernel_local_size = oneapi_dll_.oneapi_kernel_preferred_local_size(
kernel_context_->queue, (::DeviceKernel)kernel, kernel_work_size);
size_t uniformed_kernel_work_size = round_up(kernel_work_size, kernel_local_size);
assert(kernel_context_);
/* Call the oneAPI kernel DLL to launch the requested kernel. */
bool is_finished_ok = oneapi_dll_.oneapi_enqueue_kernel(
kernel_context_, kernel, uniformed_kernel_work_size, args);
if (is_finished_ok == false) {
oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
"\" execution error: got runtime exception \"" +
oneapi_device_->oneapi_error_message() + "\"");
}
return is_finished_ok;
}
bool OneapiDeviceQueue::synchronize()
{
if (oneapi_device_->have_error()) {
return false;
}
bool is_finished_ok = oneapi_dll_.oneapi_queue_synchronize(oneapi_device_->sycl_queue());
if (is_finished_ok == false)
oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
oneapi_device_->oneapi_error_message() + "\"");
debug_synchronize();
return !(oneapi_device_->have_error());
}
void OneapiDeviceQueue::zero_to_device(device_memory &mem)
{
oneapi_device_->mem_zero(mem);
}
void OneapiDeviceQueue::copy_to_device(device_memory &mem)
{
oneapi_device_->mem_copy_to(mem);
}
void OneapiDeviceQueue::copy_from_device(device_memory &mem)
{
oneapi_device_->mem_copy_from(mem);
}
CCL_NAMESPACE_END
#endif /* WITH_ONEAPI */

View File

@@ -1,51 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Intel Corporation */
#pragma once
#ifdef WITH_ONEAPI
# include "device/kernel.h"
# include "device/memory.h"
# include "device/queue.h"
# include "device/oneapi/device.h"
# include "device/oneapi/dll_interface.h"
CCL_NAMESPACE_BEGIN
class OneapiDevice;
class device_memory;
/* Base class for OneAPI queues. */
class OneapiDeviceQueue : public DeviceQueue {
public:
explicit OneapiDeviceQueue(OneapiDevice *device);
~OneapiDeviceQueue();
virtual int num_concurrent_states(const size_t state_size) const override;
virtual int num_concurrent_busy_states() const override;
virtual void init_execution() override;
virtual bool enqueue(DeviceKernel kernel,
const int kernel_work_size,
DeviceKernelArguments const &args) override;
virtual bool synchronize() override;
virtual void zero_to_device(device_memory &mem) override;
virtual void copy_to_device(device_memory &mem) override;
virtual void copy_from_device(device_memory &mem) override;
protected:
OneapiDevice *oneapi_device_;
OneAPIDLLInterface oneapi_dll_;
KernelContext *kernel_context_;
bool with_kernel_statistics_;
};
CCL_NAMESPACE_END
#endif /* WITH_ONEAPI */

View File

@@ -2047,7 +2047,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)
/* Update traversable handle (since it is different for each device on multi devices). */
KernelData *const data = (KernelData *)host;
*(OptixTraversableHandle *)&data->device_bvh = tlas_handle;
*(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
return;

View File

@@ -105,13 +105,6 @@ class DeviceQueue {
* value. */
virtual int num_concurrent_busy_states() const = 0;
/* Number of elements in a partition of sorted shaders, that improves memory locality of
* integrator state fetch at the cost of decreased coherence for shader kernel execution. */
virtual int num_sort_partition_elements() const
{
return 65536;
}
/* Initialize execution of kernels on this queue.
*
* Will, for example, load all data required by the kernels from Device to global or path state.

View File

@@ -373,7 +373,7 @@ void PathTrace::path_trace(RenderWork &render_work)
work_balance_infos_[i].time_spent += work_time;
work_balance_infos_[i].occupancy = statistics.occupancy;
VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
VLOG_WORK << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
<< work_time / num_samples
<< " seconds per sample), occupancy: " << statistics.occupancy;
});
@@ -1103,8 +1103,6 @@ static const char *device_type_for_description(const DeviceType type)
return "OptiX";
case DEVICE_HIP:
return "HIP";
case DEVICE_ONEAPI:
return "oneAPI";
case DEVICE_DUMMY:
return "Dummy";
case DEVICE_MULTI:

View File

@@ -181,45 +181,27 @@ void PathTraceWorkGPU::alloc_integrator_queue()
void PathTraceWorkGPU::alloc_integrator_sorting()
{
/* Compute sort partitions, to balance between memory locality and coherence.
* Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
* more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
*/
num_sort_partitions_ = 1;
if (device_scene_->data.max_shaders < 300) {
const int num_elements = queue_->num_sort_partition_elements();
if (num_elements) {
num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
}
}
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
num_sort_partitions_);
/* Allocate arrays for shader sorting. */
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
if (integrator_shader_sort_counter_.size() < sort_buckets) {
integrator_shader_sort_counter_.alloc(sort_buckets);
const int max_shaders = device_scene_->data.max_shaders;
if (integrator_shader_sort_counter_.size() < max_shaders) {
integrator_shader_sort_counter_.alloc(max_shaders);
integrator_shader_sort_counter_.zero_to_device();
integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
integrator_shader_raytrace_sort_counter_.zero_to_device();
integrator_shader_mnee_sort_counter_.alloc(max_shaders);
integrator_shader_mnee_sort_counter_.zero_to_device();
integrator_shader_sort_prefix_sum_.alloc(max_shaders);
integrator_shader_sort_prefix_sum_.zero_to_device();
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
(int *)integrator_shader_sort_counter_.device_pointer;
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
integrator_shader_raytrace_sort_counter_.zero_to_device();
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
}
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
integrator_shader_mnee_sort_counter_.zero_to_device();
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
}
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
integrator_shader_sort_prefix_sum_.zero_to_device();
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
}
}
@@ -351,12 +333,8 @@ void PathTraceWorkGPU::enqueue_reset()
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
queue_->zero_to_device(integrator_queue_counter_);
queue_->zero_to_device(integrator_shader_sort_counter_);
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
}
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
}
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
* counter on the host side because `zero_to_device()` is not doing it. */
@@ -508,9 +486,9 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
/* Compute prefix sum of number of active paths with each shader. */
{
const int work_size = 1;
int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
int max_shaders = device_scene_->data.max_shaders;
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &sort_buckets);
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
}

View File

@@ -156,9 +156,6 @@ class PathTraceWorkGPU : public PathTraceWork {
bool interop_use_checked_ = false;
bool interop_use_ = false;
/* Number of partitions to sort state indices into prior to material sort. */
int num_sort_partitions_;
/* Maximum number of concurrent integrator states. */
int max_num_paths_;

View File

@@ -37,10 +37,6 @@ set(SRC_KERNEL_DEVICE_OPTIX
device/optix/kernel_shader_raytrace.cu
)
set(SRC_KERNEL_DEVICE_ONEAPI
device/oneapi/kernel.cpp
)
set(SRC_KERNEL_DEVICE_CPU_HEADERS
device/cpu/compat.h
device/cpu/image.h
@@ -79,20 +75,9 @@ set(SRC_KERNEL_DEVICE_METAL_HEADERS
device/metal/compat.h
device/metal/context_begin.h
device/metal/context_end.h
device/metal/function_constants.h
device/metal/globals.h
)
set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
device/oneapi/compat.h
device/oneapi/context_begin.h
device/oneapi/context_end.h
device/oneapi/globals.h
device/oneapi/image.h
device/oneapi/kernel.h
device/oneapi/kernel_templates.h
)
set(SRC_KERNEL_CLOSURE_HEADERS
closure/alloc.h
closure/bsdf.h
@@ -155,7 +140,6 @@ set(SRC_KERNEL_SVM_HEADERS
svm/math_util.h
svm/mix.h
svm/musgrave.h
svm/node_types_template.h
svm/noise.h
svm/noisetex.h
svm/normal.h
@@ -284,7 +268,6 @@ set(SRC_KERNEL_UTIL_HEADERS
set(SRC_KERNEL_TYPES_HEADERS
data_arrays.h
data_template.h
tables.h
types.h
)
@@ -704,209 +687,6 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
cycles_set_solution_folder(cycles_kernel_optix)
endif()
if(WITH_CYCLES_DEVICE_ONEAPI)
if(WIN32)
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
else()
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.so)
endif()
set(cycles_oneapi_kernel_sources
${SRC_KERNEL_DEVICE_ONEAPI}
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
${SRC_UTIL_HEADERS}
)
# SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options
set(sycl_compiler_flags
${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
-fsycl
-fsycl-unnamed-lambda
-fdelayed-template-parsing
-mllvm -inlinedefault-threshold=300
-mllvm -inlinehint-threshold=400
-shared
-DWITH_ONEAPI
-ffast-math
-DNDEBUG
-O2
-o ${cycles_kernel_oneapi_lib}
-I${CMAKE_CURRENT_SOURCE_DIR}/..
${SYCL_CPP_FLAGS}
)
if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED)
endif()
# Set defaults for spir64 and spir64_gen options
if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
endif()
if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
SET (CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
endif()
# enabling zebin (graphics binary format with improved compatibility) on Windows only while support on Linux isn't available yet
if(WIN32)
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
endif()
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")
if (WITH_CYCLES_ONEAPI_BINARIES)
# Iterate over all targest and their options
list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
foreach(target ${CYCLES_ONEAPI_SYCL_TARGETS})
if(DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_${target})
list (APPEND sycl_compiler_flags -Xsycl-target-backend=${target} "${CYCLES_ONEAPI_SYCL_OPTIONS_${target}}")
endif()
endforeach()
else()
# If AOT is disabled, build for spir64
list(APPEND sycl_compiler_flags
-fsycl-targets=spir64
-Xsycl-target-backend=spir64 "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}")
endif()
if(WITH_NANOVDB)
list(APPEND sycl_compiler_flags
-DWITH_NANOVDB
-I"${NANOVDB_INCLUDE_DIR}")
endif()
if(WITH_CYCLES_DEBUG)
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
endif()
get_filename_component(sycl_compiler_root ${SYCL_COMPILER} DIRECTORY)
get_filename_component(sycl_compiler_compiler_name ${SYCL_COMPILER} NAME_WE)
if(NOT OCLOC_INSTALL_DIR)
get_filename_component(OCLOC_INSTALL_DIR "${sycl_compiler_root}/../lib/ocloc" ABSOLUTE)
endif()
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT EXISTS ${OCLOC_INSTALL_DIR})
message(FATAL_ERROR "WITH_CYCLES_ONEAPI_BINARIES requires ocloc but ${OCLOC_INSTALL_DIR} directory doesn't exist."
" A different ocloc directory can be set using OCLOC_INSTALL_DIR cmake variable.")
endif()
if(UNIX AND NOT APPLE)
if(NOT WITH_CXX11_ABI)
check_library_exists(sycl
_ZN2cl4sycl7handler22verifyUsedKernelBundleERKSs ${sycl_compiler_root}/../lib SYCL_NO_CXX11_ABI)
if(SYCL_NO_CXX11_ABI)
list(APPEND sycl_compiler_flags -D_GLIBCXX_USE_CXX11_ABI=0)
endif()
endif()
endif()
if(WIN32)
list(APPEND sycl_compiler_flags
-fms-extensions
-fms-compatibility
-D_WINDLL
-D_MBCS
-DWIN32
-D_WINDOWS
-D_CRT_NONSTDC_NO_DEPRECATE
-D_CRT_SECURE_NO_DEPRECATE
-DONEAPI_EXPORT)
if(sycl_compiler_compiler_name MATCHES "dpcpp")
# The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
add_custom_command(
OUTPUT ${cycles_kernel_oneapi_lib}
COMMAND "${sycl_compiler_root}/../../env/vars.bat"
COMMAND ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
DEPENDS ${cycles_oneapi_kernel_sources})
else()
# The open source SYCL compiler just goes by clang++ and does not have such a script.
# Set the variables manually.
string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows
get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
else()
set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
endif()
list(APPEND sycl_compiler_flags
-L "${MSVC_TOOLS_DIR}/lib/x64"
-L "${WINDOWS_KIT_DIR}/um/x64"
-L "${WINDOWS_KIT_DIR}/ucrt/x64")
add_custom_command(
OUTPUT ${cycles_kernel_oneapi_lib}
COMMAND ${CMAKE_COMMAND} -E env
"LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib
"PATH=${OCLOC_INSTALL_DIR};${sycl_compiler_root}"
${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
DEPENDS ${cycles_oneapi_kernel_sources})
endif()
else()
list(APPEND sycl_compiler_flags -fPIC)
# We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash
# it triggers at either AoT or JIT stages gets fixed.
list(APPEND sycl_compiler_flags -fhonor-nans)
# add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and
# libpi_level_zero.so can be placed next to it and get found.
list(APPEND sycl_compiler_flags -Wl,-rpath,'$$ORIGIN')
# The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
if(sycl_compiler_compiler_name MATCHES "dpcpp")
add_custom_command(
OUTPUT ${cycles_kernel_oneapi_lib}
COMMAND bash -c \"source ${sycl_compiler_root}/../../env/vars.sh&&${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}\"
DEPENDS ${cycles_oneapi_kernel_sources})
else()
# The open source SYCL compiler just goes by clang++ and does not have such a script.
# Set the variables manually.
if(NOT IGC_INSTALL_DIR)
get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
endif()
add_custom_command(
OUTPUT ${cycles_kernel_oneapi_lib}
COMMAND ${CMAKE_COMMAND} -E env
"LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
"PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld
${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
DEPENDS ${cycles_oneapi_kernel_sources})
endif()
endif()
# install dynamic libraries required at runtime
if(WIN32)
set(SYCL_RUNTIME_DEPENDENCIES
sycl.dll
pi_level_zero.dll
)
if(NOT WITH_BLENDER)
# For the Cycles standalone put libraries next to the Cycles application.
delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH})
else()
# For Blender put the libraries next to the Blender executable.
#
# Note that the installation path in the delayed_install is relative to the versioned folder,
# which means we need to go one level up.
delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" "../")
endif()
elseif(UNIX AND NOT APPLE)
file(GLOB SYCL_RUNTIME_DEPENDENCIES
${sycl_compiler_root}/../lib/libsycl.so
${sycl_compiler_root}/../lib/libsycl.so.[0-9]
${sycl_compiler_root}/../lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9]
)
list(APPEND SYCL_RUNTIME_DEPENDENCIES ${sycl_compiler_root}/../lib/libpi_level_zero.so)
delayed_install("" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}/lib)
endif()
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
endif()
# OSL module
if(WITH_CYCLES_OSL)
@@ -972,7 +752,6 @@ cycles_add_library(cycles_kernel "${LIB}"
${SRC_KERNEL_DEVICE_HIP_HEADERS}
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
${SRC_KERNEL_DEVICE_METAL_HEADERS}
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
)
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
@@ -985,7 +764,6 @@ source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
@@ -1004,9 +782,6 @@ endif()
if(WITH_CYCLES_HIP)
add_dependencies(cycles_kernel cycles_kernel_hip)
endif()
if(WITH_CYCLES_DEVICE_ONEAPI)
add_dependencies(cycles_kernel cycles_kernel_oneapi)
endif()
# Install kernel source for runtime compilation

View File

@@ -29,14 +29,14 @@ ccl_device void kernel_displace_evaluate(KernelGlobals kg,
object_inverse_dir_transform(kg, &sd, &D);
#ifdef __KERNEL_DEBUG_NAN__
if (!isfinite_safe(D)) {
if (!isfinite3_safe(D)) {
kernel_assert(!"Cycles displacement with non-finite value detected");
}
#endif
/* Ensure finite displacement, preventing BVH from becoming degenerate and avoiding possible
* traversal issues caused by non-finite math. */
D = ensure_finite(D);
D = ensure_finite3(D);
/* Write output. */
output[offset * 3 + 0] += D.x;
@@ -68,13 +68,13 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
float3 color = shader_background_eval(&sd);
#ifdef __KERNEL_DEBUG_NAN__
if (!isfinite_safe(color)) {
if (!isfinite3_safe(color)) {
kernel_assert(!"Cycles background with non-finite value detected");
}
#endif
/* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */
color = ensure_finite(color);
color = ensure_finite3(color);
/* Write output. */
output[offset * 3 + 0] += color.x;

View File

@@ -172,11 +172,11 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
ray->tmin,
ray->tmax,
0.0f,
ray->t,
ray->time,
ray_mask,
ray_flags,
@@ -203,28 +203,28 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
#elif defined(__METALRT__)
if (!scene_intersect_valid(ray)) {
isect->t = ray->tmax;
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
return false;
}
# if defined(__KERNEL_DEBUG__)
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
isect->t = ray->tmax;
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
return false;
}
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
isect->t = ray->tmax;
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
kernel_assert(!"Invalid ift_default");
return false;
}
# endif
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
if (!kernel_data.bvh.have_curves) {
@@ -263,7 +263,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
# endif
if (intersection.type == intersection_type::none) {
isect->t = ray->tmax;
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
return false;
@@ -295,14 +295,14 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
isect->t = ray->tmax;
if (kernel_data.bvh.scene) {
isect->t = ray->t;
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
IntersectContext rtc_ctx(&ctx);
RTCRayHit ray_hit;
ctx.ray = ray;
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
@@ -357,11 +357,11 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
if (local_isect) {
local_isect->num_hits = 0; /* Initialize hit count to zero. */
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
ray->tmin,
ray->tmax,
0.0f,
ray->t,
ray->time,
0xFF,
/* Need to always call into __anyhit__kernel_optix_local_hit. */
@@ -405,7 +405,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
# endif
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
@@ -451,7 +451,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
if (kernel_data.bvh.scene) {
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
SD_OBJECT_TRANSFORM_APPLIED);
CCLIntersectContext ctx(
@@ -470,13 +470,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
/* If this object has its own BVH, use it. */
if (has_bvh) {
RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
if (geom) {
float3 P = ray->P;
float3 dir = ray->D;
float3 idir = ray->D;
Transform ob_itfm;
rtc_ray.tfar = ray->tmax *
rtc_ray.tfar = ray->t *
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
/* bvh_instance_motion_push() returns the inverse transform but
* it's not needed here. */
@@ -496,7 +496,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
}
else {
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
}
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
@@ -539,11 +539,11 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
ray_mask = 0xFF;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
ray->tmin,
ray->tmax,
0.0f,
ray->t,
ray->time,
ray_mask,
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
@@ -582,7 +582,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
}
# endif
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
@@ -633,7 +633,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
Intersection *isect_array = (Intersection *)state->shadow_isect;
ctx.isect_s = isect_array;
@@ -642,7 +642,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
*num_recorded_hits = ctx.num_recorded_hits;
*throughput = ctx.throughput;
@@ -698,11 +698,11 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
ray_mask = 0xFF;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
ray->tmin,
ray->tmax,
0.0f,
ray->t,
ray->time,
ray_mask,
/* Need to always call into __anyhit__kernel_optix_volume_test. */
@@ -744,7 +744,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
}
# endif
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
@@ -825,7 +825,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
ctx.isect_s = isect;
ctx.max_hits = max_hits;
@@ -834,7 +834,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
return ctx.num_hits;
}
# endif /* __EMBREE__ */

View File

@@ -83,8 +83,8 @@ ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
rtc_ray.dir_x = ray.D.x;
rtc_ray.dir_y = ray.D.y;
rtc_ray.dir_z = ray.D.z;
rtc_ray.tnear = ray.tmin;
rtc_ray.tfar = ray.tmax;
rtc_ray.tnear = 0.0f;
rtc_ray.tfar = ray.t;
rtc_ray.time = ray.time;
rtc_ray.mask = visibility;
}
@@ -107,7 +107,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
const int oID = hit->instID[0] / 2;
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
const int pID = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
status = intersection_skip_self_shadow(ray->self, oID, pID);
@@ -117,7 +117,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
const int oID = hit->geomID / 2;
if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
status = intersection_skip_self_shadow(ray->self, oID, pID);
}
}
@@ -133,14 +133,14 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
isect->t = ray->tfar;
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
isect->prim = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
isect->object = hit->instID[0] / 2;
}
else {
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
isect->object = hit->geomID / 2;
}
@@ -166,7 +166,7 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
isect->v = hit->u;
isect->t = ray->tfar;
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.device_bvh, object * 2));
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
isect->prim = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
isect->object = object;

View File

@@ -47,9 +47,8 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
float tmin = ray->tmin;
int object = OBJECT_NONE;
float isect_t = ray->tmax;
float isect_t = ray->t;
if (local_isect != NULL) {
local_isect->num_hits = 0;
@@ -60,13 +59,10 @@ ccl_device_inline
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
const float t_world_to_instance = bvh_instance_motion_push(
kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
#else
const float t_world_to_instance = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
#endif
isect_t *= t_world_to_instance;
tmin *= t_world_to_instance;
object = local_object;
}
@@ -85,7 +81,6 @@ ccl_device_inline
dir,
#endif
idir,
tmin,
isect_t,
node_addr,
PATH_RAY_ALL_VISIBILITY,
@@ -160,7 +155,6 @@ ccl_device_inline
local_object,
prim,
prim_addr,
tmin,
isect_t,
lcg_state,
max_hits)) {
@@ -197,7 +191,6 @@ ccl_device_inline
local_object,
prim,
prim_addr,
tmin,
isect_t,
lcg_state,
max_hits)) {

View File

@@ -18,8 +18,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
const float3 P,
const float3 idir,
const float tmin,
const float tmax,
const float t,
const int node_addr,
const uint visibility,
float dist[2])
@@ -40,8 +39,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
float c0hiy = (node1.z - P.y) * idir.y;
float c0loz = (node2.x - P.z) * idir.z;
float c0hiz = (node2.z - P.z) * idir.z;
float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
float c1lox = (node0.y - P.x) * idir.x;
float c1hix = (node0.w - P.x) * idir.x;
@@ -49,8 +48,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
float c1hiy = (node1.w - P.y) * idir.y;
float c1loz = (node2.y - P.z) * idir.z;
float c1hiz = (node2.w - P.z) * idir.z;
float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
dist[0] = c0min;
dist[1] = c1min;
@@ -67,8 +66,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
const float3 P,
const float3 dir,
const float tmin,
const float tmax,
const float t,
int node_addr,
int child,
float dist[2])
@@ -85,8 +83,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
const float far_x = max(lower_xyz.x, upper_xyz.x);
const float far_y = max(lower_xyz.y, upper_xyz.y);
const float far_z = max(lower_xyz.z, upper_xyz.z);
const float tnear = max4(tmin, near_x, near_y, near_z);
const float tfar = min4(tmax, far_x, far_y, far_z);
const float tnear = max4(0.0f, near_x, near_y, near_z);
const float tfar = min4(t, far_x, far_y, far_z);
*dist = tnear;
return tnear <= tfar;
}
@@ -95,8 +93,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
const float3 P,
const float3 dir,
const float3 idir,
const float tmin,
const float tmax,
const float t,
const int node_addr,
const uint visibility,
float dist[2])
@@ -105,7 +102,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0);
#endif
if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) {
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
#endif
@@ -113,7 +110,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
mask |= 1;
}
}
if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) {
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
#endif
@@ -128,17 +125,16 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
const float3 P,
const float3 dir,
const float3 idir,
const float tmin,
const float tmax,
const float t,
const int node_addr,
const uint visibility,
float dist[2])
{
float4 node = kernel_data_fetch(bvh_nodes, node_addr);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist);
return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist);
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}

View File

@@ -49,7 +49,6 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
float tmin = ray->tmin;
int object = OBJECT_NONE;
uint num_hits = 0;
@@ -60,12 +59,12 @@ ccl_device_inline
/* Max distance in world space. May be dynamically reduced when max number of
* recorded hits is exceeded and we no longer need to find hits beyond the max
* distance found. */
float t_max_world = ray->tmax;
float t_max_world = ray->t;
/* Current maximum distance to the intersection.
* Is calculated as a ray length, transformed to an object space when entering
* instance node. */
float t_max_current = ray->tmax;
float t_max_current = ray->t;
/* Conversion from world to local space for the current instance if any, 1.0
* otherwise. */
@@ -89,7 +88,6 @@ ccl_device_inline
dir,
#endif
idir,
tmin,
t_max_current,
node_addr,
visibility,
@@ -158,16 +156,8 @@ ccl_device_inline
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(kg,
&isect,
P,
dir,
tmin,
t_max_current,
visibility,
prim_object,
prim,
prim_addr);
hit = triangle_intersect(
kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -176,7 +166,6 @@ ccl_device_inline
&isect,
P,
dir,
tmin,
t_max_current,
ray->time,
visibility,
@@ -200,16 +189,8 @@ ccl_device_inline
}
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
hit = curve_intersect(kg,
&isect,
P,
dir,
tmin,
t_max_current,
prim_object,
prim,
ray->time,
curve_type);
hit = curve_intersect(
kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type);
break;
}
@@ -226,16 +207,8 @@ ccl_device_inline
}
const int point_type = kernel_data_fetch(prim_type, prim_addr);
hit = point_intersect(kg,
&isect,
P,
dir,
tmin,
t_max_current,
prim_object,
prim,
ray->time,
point_type);
hit = point_intersect(
kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type);
break;
}
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
@@ -329,7 +302,6 @@ ccl_device_inline
/* Convert intersection to object space. */
t_max_current *= t_world_to_instance;
tmin *= t_world_to_instance;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -351,8 +323,7 @@ ccl_device_inline
#endif
/* Restore world space ray length. */
tmin = ray->tmin;
t_max_current = ray->tmax;
t_max_current = ray->t;
object = OBJECT_NONE;
t_world_to_instance = 1.0f;

View File

@@ -43,14 +43,13 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
float tmin = ray->tmin;
int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
#endif
isect->t = ray->tmax;
isect->t = ray->t;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
@@ -72,7 +71,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
dir,
#endif
idir,
tmin,
isect->t,
node_addr,
visibility,
@@ -135,16 +133,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
if (triangle_intersect(kg,
isect,
P,
dir,
tmin,
isect->t,
visibility,
prim_object,
prim,
prim_addr)) {
if (triangle_intersect(
kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) {
/* shadow ray early termination */
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -157,7 +147,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
isect,
P,
dir,
tmin,
isect->t,
ray->time,
visibility,
@@ -185,7 +174,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
const bool hit = curve_intersect(
kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type);
kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type);
if (hit) {
/* shadow ray early termination */
if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -206,7 +195,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
const int point_type = kernel_data_fetch(prim_type, prim_addr);
const bool hit = point_intersect(
kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type);
kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type);
if (hit) {
/* shadow ray early termination */
if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -223,15 +212,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
object = kernel_data_fetch(prim_object, -prim_addr - 1);
#if BVH_FEATURE(BVH_MOTION)
const float t_world_to_instance = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, &ob_itfm);
isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
#else
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
isect->t *= t_world_to_instance;
tmin *= t_world_to_instance;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -250,7 +235,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
#endif
tmin = ray->tmin;
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];

View File

@@ -5,19 +5,6 @@
CCL_NAMESPACE_BEGIN
/* Offset intersection distance by the smallest possible amount, to skip
* intersections at this distance. This works in cases where the ray start
* position is unchanged and only tmin is updated, since for self
* intersection we'll be comparing against the exact same distances. */
ccl_device_forceinline float intersection_t_offset(const float t)
{
/* This is a simplified version of nextafterf(t, FLT_MAX), only dealing with
* non-negative and finite t. */
kernel_assert(t >= 0.0f && isfinite_safe(t));
const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
return __uint_as_float(bits);
}
#if defined(__KERNEL_CPU__)
ccl_device int intersections_compare(const void *a, const void *b)
{

View File

@@ -46,14 +46,13 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
float tmin = ray->tmin;
int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
#endif
isect->t = ray->tmax;
isect->t = ray->t;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
@@ -74,7 +73,6 @@ ccl_device_inline
dir,
#endif
idir,
tmin,
isect->t,
node_addr,
visibility,
@@ -142,7 +140,7 @@ ccl_device_inline
continue;
}
triangle_intersect(
kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr);
kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr);
}
break;
}
@@ -167,7 +165,6 @@ ccl_device_inline
isect,
P,
dir,
tmin,
isect->t,
ray->time,
visibility,
@@ -189,15 +186,11 @@ ccl_device_inline
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
const float t_world_to_instance = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, &ob_itfm);
isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
#else
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
isect->t *= t_world_to_instance;
tmin *= t_world_to_instance;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -224,8 +217,6 @@ ccl_device_inline
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
#endif
tmin = ray->tmin;
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;

View File

@@ -44,12 +44,12 @@ ccl_device_inline
int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
const float tmax = ray->t;
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
float tmin = ray->tmin;
int object = OBJECT_NONE;
float isect_t = ray->tmax;
float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@@ -58,7 +58,7 @@ ccl_device_inline
int num_hits_in_instance = 0;
uint num_hits = 0;
isect_array->t = ray->tmax;
isect_array->t = tmax;
/* traversal loop */
do {
@@ -75,7 +75,6 @@ ccl_device_inline
dir,
#endif
idir,
tmin,
isect_t,
node_addr,
visibility,
@@ -142,16 +141,8 @@ ccl_device_inline
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = triangle_intersect(kg,
isect_array,
P,
dir,
tmin,
isect_t,
visibility,
prim_object,
prim,
prim_addr);
hit = triangle_intersect(
kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -198,7 +189,6 @@ ccl_device_inline
isect_array,
P,
dir,
tmin,
isect_t,
ray->time,
visibility,
@@ -242,15 +232,11 @@ ccl_device_inline
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
const float t_world_to_instance = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, &ob_itfm);
isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
#else
const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
isect_t *= t_world_to_instance;
tmin *= t_world_to_instance;
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -294,8 +280,7 @@ ccl_device_inline
#endif
}
tmin = ray->tmin;
isect_t = ray->tmax;
isect_t = tmax;
isect_array->t = isect_t;
object = OBJECT_NONE;

View File

@@ -165,11 +165,9 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
float nearclip = kernel_data.cam.nearclip * z_inv;
ray->P += nearclip * ray->D;
ray->dP += nearclip * ray->dD;
ray->tmin = 0.0f;
ray->tmax = kernel_data.cam.cliplength * z_inv;
ray->t = kernel_data.cam.cliplength * z_inv;
#else
ray->tmin = 0.0f;
ray->tmax = FLT_MAX;
ray->t = FLT_MAX;
#endif
}
@@ -233,11 +231,9 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,
#ifdef __CAMERA_CLIPPING__
/* clipping */
ray->tmin = 0.0f;
ray->tmax = kernel_data.cam.cliplength;
ray->t = kernel_data.cam.cliplength;
#else
ray->tmin = 0.0f;
ray->tmax = FLT_MAX;
ray->t = FLT_MAX;
#endif
}
@@ -262,7 +258,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
/* indicates ray should not receive any light, outside of the lens */
if (is_zero(D)) {
ray->tmax = 0.0f;
ray->t = 0.0f;
return;
}
@@ -353,11 +349,9 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
float nearclip = cam->nearclip;
ray->P += nearclip * ray->D;
ray->dP += nearclip * ray->dD;
ray->tmin = 0.0f;
ray->tmax = cam->cliplength;
ray->t = cam->cliplength;
#else
ray->tmin = 0.0f;
ray->tmax = FLT_MAX;
ray->t = FLT_MAX;
#endif
}
@@ -374,7 +368,7 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
ccl_private Ray *ray)
{
/* pixel filter */
int filter_table_offset = kernel_data.tables.filter_table_offset;
int filter_table_offset = kernel_data.film.filter_table_offset;
float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);

View File

@@ -51,7 +51,7 @@ ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData *
int size,
float3 weight)
{
kernel_assert(isfinite_safe(weight));
kernel_assert(isfinite3_safe(weight));
const float sample_weight = fabsf(average(weight));
@@ -77,7 +77,7 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
float3 weight,
void *data)
{
kernel_assert(isfinite_safe(weight));
kernel_assert(isfinite3_safe(weight));
const float sample_weight = fabsf(average(weight));

View File

@@ -439,7 +439,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
*eval *= shift_cos_in(dot(*omega_in, sc->N), frequency_multiplier);
}
if (label & LABEL_DIFFUSE) {
if (!isequal(sc->N, sd->N)) {
if (!isequal_float3(sc->N, sd->N)) {
*eval *= bump_shadowing_term((label & LABEL_TRANSMIT) ? -sd->N : sd->N, sc->N, *omega_in);
}
}
@@ -550,7 +550,7 @@ ccl_device_inline
break;
}
if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
if (!isequal(sc->N, sd->N)) {
if (!isequal_float3(sc->N, sd->N)) {
eval *= bump_shadowing_term(sd->N, sc->N, omega_in);
}
}
@@ -635,7 +635,7 @@ ccl_device_inline
break;
}
if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
if (!isequal(sc->N, sd->N)) {
if (!isequal_float3(sc->N, sd->N)) {
eval *= bump_shadowing_term(-sd->N, sc->N, omega_in);
}
}

View File

@@ -203,7 +203,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
float h = (sd->type & PRIMITIVE_CURVE_RIBBON) ? -sd->v : dot(cross(sd->Ng, X), Z);
kernel_assert(fabsf(h) < 1.0f + 1e-4f);
kernel_assert(isfinite_safe(Y));
kernel_assert(isfinite3_safe(Y));
kernel_assert(isfinite_safe(h));
bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
@@ -272,7 +272,7 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
const float3 omega_in,
ccl_private float *pdf)
{
kernel_assert(isfinite_safe(sd->P) && isfinite_safe(sd->ray_length));
kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
ccl_private const PrincipledHairBSDF *bsdf = (ccl_private const PrincipledHairBSDF *)sc;
float3 Y = float4_to_float3(bsdf->extra->geom);
@@ -299,7 +299,7 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
float cos_gamma_t = cos_from_sin(sin_gamma_t);
float gamma_t = safe_asinf(sin_gamma_t);
float3 T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
float4 Ap[4];
hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
@@ -319,25 +319,25 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
F = Ap[0] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
/* Transmission (TT). */
Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
F += Ap[1] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
/* Secondary specular (TRT). */
Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
F += Ap[2] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
/* Residual component (TRRT+). */
Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
Np = M_1_2PI_F;
F += Ap[3] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
*pdf = F.w;
return float4_to_float3(F);
@@ -385,7 +385,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
float cos_gamma_t = cos_from_sin(sin_gamma_t);
float gamma_t = safe_asinf(sin_gamma_t);
float3 T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
float4 Ap[4];
hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
@@ -436,25 +436,25 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
F = Ap[0] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
/* Transmission (TT). */
Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
F += Ap[1] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
/* Secondary specular (TRT). */
Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
F += Ap[2] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
/* Residual component (TRRT+). */
Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
Np = M_1_2PI_F;
F += Ap[3] * Mp * Np;
kernel_assert(isfinite_safe(float4_to_float3(F)));
kernel_assert(isfinite3_safe(float4_to_float3(F)));
*eval = float4_to_float3(F);
*pdf = F.w;
@@ -492,13 +492,13 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
ccl_device float3 bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
{
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
}
ccl_device_inline float3
bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness)
{
const float3 sigma = log(color) /
const float3 sigma = log3(color) /
bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
return sigma * sigma;
}

Some files were not shown because too many files have changed in this diff Show More