Compare commits
92 Commits
temp-copy-
...
tmp-vulkan
Author | SHA1 | Date | |
---|---|---|---|
b40787115b | |||
fc502a8710 | |||
d944b969f0 | |||
8e43757834 | |||
b65df10346 | |||
8bf8db8ca2 | |||
5bd7a2c416 | |||
8c3e8d0eb6 | |||
8bfdec76c0 | |||
9cc5075680 | |||
2ea8725339 | |||
a55bbedc06 | |||
5d52da2e65 | |||
25b341a4ae | |||
dfa5bc25ae | |||
ddd64ba03a | |||
0b3caae8a7 | |||
b1d5950471 | |||
99886f91a9 | |||
fa00caba88 | |||
f853163a54 | |||
87492f93a4 | |||
6152113b45 | |||
2741a4106a | |||
3420792ea8 | |||
3eaddb4da8 | |||
1a85a68a5e | |||
adba79a04f | |||
c73894ea5b | |||
48360afb10 | |||
0493a32803 | |||
79346fd72e | |||
44c875f59a | |||
a68be3b3c2 | |||
c124606d70 | |||
975b09e3e7 | |||
15ac620204 | |||
983f61e9c5 | |||
ed959cd912 | |||
193a17474e | |||
5b246fd4b3 | |||
f7d7a5aad7 | |||
8b68ee3a17 | |||
230b24159e | |||
a5a01cc0c3 | |||
e7b3a7028b | |||
fced6f19be | |||
402e19ddc8 | |||
af2bc8be40 | |||
72bce1be8e | |||
d390f01e48 | |||
0956a5134a | |||
9a08daae92 | |||
dd4405121c | |||
b996cc6440 | |||
0bcd24c0c4 | |||
12e88ee722 | |||
b908c3fa0a | |||
ef8e53c15b | |||
639829ea1a | |||
0ef5c14de0 | |||
26fd1c71e1 | |||
de062ffd10 | |||
fc0f409911 | |||
c72e6c25d7 | |||
816bd8bed6 | |||
47613a40f5 | |||
ea5a70973d | |||
70c0f652de | |||
e70dcde88b | |||
6d82ae3581 | |||
99fc68bf6a | |||
3efdb3f232 | |||
193cebd3c2 | |||
bdeca237ff | |||
a08d242acd | |||
5629ff5f69 | |||
eb23ecd1b3 | |||
a02da40732 | |||
bd0c1d8c53 | |||
73b585b0b2 | |||
b549b1f477 | |||
c155c10888 | |||
bc86701b58 | |||
6dc1d823ba | |||
392e744ed5 | |||
f56af0b899 | |||
78cbcccf34 | |||
b671b33871 | |||
be9dc493a4 | |||
8dec7e5219 | |||
b79e7f92bf |
@@ -12,8 +12,6 @@ Checks: >
|
||||
-readability-avoid-const-params-in-decls,
|
||||
-readability-simplify-boolean-expr,
|
||||
-readability-make-member-function-const,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-redundant-member-init,
|
||||
|
||||
-readability-misleading-indentation,
|
||||
|
||||
@@ -27,8 +25,6 @@ Checks: >
|
||||
-bugprone-branch-clone,
|
||||
-bugprone-macro-parentheses,
|
||||
-bugprone-reserved-identifier,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
|
||||
-bugprone-sizeof-expression,
|
||||
-bugprone-integer-division,
|
||||
@@ -44,8 +40,7 @@ Checks: >
|
||||
-modernize-pass-by-value,
|
||||
# Cannot be enabled yet, because using raw string literals in tests breaks
|
||||
# the windows compiler currently.
|
||||
-modernize-raw-string-literal,
|
||||
-modernize-return-braced-init-list
|
||||
-modernize-raw-string-literal
|
||||
|
||||
CheckOptions:
|
||||
- key: modernize-use-default-member-init.UseAssignment
|
||||
|
@@ -187,13 +187,6 @@ mark_as_advanced(CPACK_OVERRIDE_PACKAGENAME)
|
||||
mark_as_advanced(BUILDINFO_OVERRIDE_DATE)
|
||||
mark_as_advanced(BUILDINFO_OVERRIDE_TIME)
|
||||
|
||||
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
|
||||
option(WITH_UNITY_BUILD "Enable unity build for modules that support it to improve compile times" ON)
|
||||
mark_as_advanced(WITH_UNITY_BUILD)
|
||||
else()
|
||||
set(WITH_UNITY_BUILD OFF)
|
||||
endif()
|
||||
|
||||
option(WITH_IK_ITASC "Enable ITASC IK solver (only disable for development & for incompatible C++ compilers)" ON)
|
||||
option(WITH_IK_SOLVER "Enable Legacy IK solver (only disable for development)" ON)
|
||||
option(WITH_FFTW3 "Enable FFTW3 support (Used for smoke, ocean sim, and audio effects)" ON)
|
||||
@@ -433,7 +426,6 @@ mark_as_advanced(WITH_CYCLES_DEBUG_NAN)
|
||||
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
|
||||
|
||||
# NVIDIA CUDA & OptiX
|
||||
if(NOT APPLE)
|
||||
option(WITH_CYCLES_DEVICE_CUDA "Enable Cycles NVIDIA CUDA compute support" ON)
|
||||
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles NVIDIA OptiX support" ON)
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
|
||||
@@ -447,26 +439,17 @@ if(NOT APPLE)
|
||||
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
|
||||
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||
endif()
|
||||
|
||||
# AMD HIP
|
||||
if(NOT APPLE)
|
||||
if(WIN32)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
else()
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||
endif()
|
||||
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||
endif()
|
||||
|
||||
# Apple Metal
|
||||
if(APPLE)
|
||||
option(WITH_CYCLES_DEVICE_METAL "Enable Cycles Apple Metal compute support" ON)
|
||||
endif()
|
||||
|
||||
# Draw Manager
|
||||
option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
|
||||
@@ -511,10 +494,11 @@ if(WIN32)
|
||||
endif()
|
||||
|
||||
# This should be turned off when Blender enter beta/rc/release
|
||||
if("${BLENDER_VERSION_CYCLE}" STREQUAL "alpha")
|
||||
set(WITH_EXPERIMENTAL_FEATURES ON)
|
||||
else()
|
||||
if("${BLENDER_VERSION_CYCLE}" STREQUAL "release" OR
|
||||
"${BLENDER_VERSION_CYCLE}" STREQUAL "rc")
|
||||
set(WITH_EXPERIMENTAL_FEATURES OFF)
|
||||
else()
|
||||
set(WITH_EXPERIMENTAL_FEATURES ON)
|
||||
endif()
|
||||
|
||||
# Unit testsing
|
||||
@@ -530,9 +514,13 @@ if(UNIX AND NOT APPLE)
|
||||
endif()
|
||||
|
||||
|
||||
# Vulkan
|
||||
option(WITH_VULKAN "Enable Vulkan backend (Experimental)" OFF)
|
||||
option(WITH_VULKAN_SHADER_COMPILATION "Temporary flag to enable vulkan shader compilation needed to continue development during the migration of GLSL to Vulkan." OFF)
|
||||
|
||||
# OpenGL
|
||||
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu (temporary option for development purposes)" ON)
|
||||
option(WITH_GLEW_ES "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
|
||||
option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)" OFF)
|
||||
option(WITH_GL_PROFILE_ES20 "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)" OFF)
|
||||
@@ -542,6 +530,7 @@ mark_as_advanced(
|
||||
WITH_GLEW_ES
|
||||
WITH_GL_EGL
|
||||
WITH_GL_PROFILE_ES20
|
||||
WITH_VULKAN_SHADER_COMPILATION
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
@@ -856,7 +845,7 @@ if(WITH_AUDASPACE)
|
||||
endif()
|
||||
|
||||
# Auto-enable CUDA dynload if toolkit is not found.
|
||||
if(WITH_CYCLES AND WITH_CYCLES_DEVICE_CUDA AND NOT WITH_CUDA_DYNLOAD)
|
||||
if(NOT WITH_CUDA_DYNLOAD)
|
||||
find_package(CUDA)
|
||||
if(NOT CUDA_FOUND)
|
||||
message(STATUS "CUDA toolkit not found, using dynamic runtime loading of libraries (WITH_CUDA_DYNLOAD) instead")
|
||||
@@ -1142,6 +1131,18 @@ if(WITH_OPENVDB)
|
||||
list(APPEND OPENVDB_LIBRARIES ${BOOST_LIBRARIES} ${TBB_LIBRARIES})
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure Vulkan.
|
||||
|
||||
if(WITH_VULKAN)
|
||||
list(APPEND BLENDER_GL_LIBRARIES ${Vulkan_LIBRARY})
|
||||
|
||||
add_definitions(-DWITH_VULKAN)
|
||||
if(WITH_VULKAN_SHADER_COMPILATION)
|
||||
add_definitions(-DWITH_VULKAN_SHADER_COMPILATION)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure OpenGL.
|
||||
|
||||
|
@@ -2083,9 +2083,9 @@ compile_OIIO() {
|
||||
cmake_d="$cmake_d -D OPENEXR_VERSION=$OPENEXR_VERSION"
|
||||
|
||||
if [ "$_with_built_openexr" = true ]; then
|
||||
cmake_d="$cmake_d -D ILMBASE_ROOT=$INST/openexr"
|
||||
cmake_d="$cmake_d -D OPENEXR_ROOT=$INST/openexr"
|
||||
INFO "Ilmbase_ROOT=$INST/openexr"
|
||||
cmake_d="$cmake_d -D ILMBASE_HOME=$INST/openexr"
|
||||
cmake_d="$cmake_d -D OPENEXR_HOME=$INST/openexr"
|
||||
INFO "ILMBASE_HOME=$INST/openexr"
|
||||
fi
|
||||
|
||||
# ptex is only needed when nicholas bishop is ready
|
||||
@@ -2374,9 +2374,9 @@ compile_OSL() {
|
||||
#~ cmake_d="$cmake_d -D ILMBASE_VERSION=$ILMBASE_VERSION"
|
||||
|
||||
if [ "$_with_built_openexr" = true ]; then
|
||||
cmake_d="$cmake_d -D ILMBASE_ROOT=$INST/openexr"
|
||||
cmake_d="$cmake_d -D OPENEXR_ROOT=$INST/openexr"
|
||||
INFO "Ilmbase_ROOT=$INST/openexr"
|
||||
INFO "ILMBASE_HOME=$INST/openexr"
|
||||
cmake_d="$cmake_d -D OPENEXR_ROOT_DIR=$INST/openexr"
|
||||
cmake_d="$cmake_d -D ILMBASE_ROOT_DIR=$INST/openexr"
|
||||
# XXX Temp workaround... sigh, ILMBase really messed the things up by defining their custom names ON by default :(
|
||||
fi
|
||||
|
||||
@@ -5801,7 +5801,7 @@ print_info() {
|
||||
PRINT "If you're using CMake add this to your configuration flags:"
|
||||
|
||||
_buildargs="-U *SNDFILE* -U PYTHON* -U *BOOST* -U *Boost* -U *TBB*"
|
||||
_buildargs="$_buildargs -U *OPENCOLORIO* -U *OPENEXR* -U *OPENIMAGEIO* -U *LLVM* -U *CLANG* -U *CYCLES*"
|
||||
_buildargs="$_buildargs -U *OPENCOLORIO* -U *OPENEXR* -U *OPENIMAGEIO* -U *LLVM* -U *CYCLES*"
|
||||
_buildargs="$_buildargs -U *OPENSUBDIV* -U *OPENVDB* -U *BLOSC* -U *COLLADA* -U *FFMPEG* -U *ALEMBIC* -U *USD*"
|
||||
_buildargs="$_buildargs -U *EMBREE* -U *OPENIMAGEDENOISE* -U *OPENXR*"
|
||||
|
||||
|
@@ -197,38 +197,3 @@ index 67ec0d15f..6dc3e85a0 100644
|
||||
#else
|
||||
#error Unknown architecture.
|
||||
#endif
|
||||
|
||||
diff --git a/pxr/base/arch/demangle.cpp b/pxr/base/arch/demangle.cpp
|
||||
index 67ec0d15f..6dc3e85a0 100644
|
||||
--- a/pxr/base/arch/demangle.cpp
|
||||
+++ b/pxr/base/arch/demangle.cpp
|
||||
@@ -36,6 +36,7 @@
|
||||
#if (ARCH_COMPILER_GCC_MAJOR == 3 && ARCH_COMPILER_GCC_MINOR >= 1) || \
|
||||
ARCH_COMPILER_GCC_MAJOR > 3 || defined(ARCH_COMPILER_CLANG)
|
||||
#define _AT_LEAST_GCC_THREE_ONE_OR_CLANG
|
||||
+#include <cxxabi.h>
|
||||
#endif
|
||||
|
||||
PXR_NAMESPACE_OPEN_SCOPE
|
||||
@@ -138,7 +139,6 @@
|
||||
#endif
|
||||
|
||||
#if defined(_AT_LEAST_GCC_THREE_ONE_OR_CLANG)
|
||||
-#include <cxxabi.h>
|
||||
|
||||
/*
|
||||
* This routine doesn't work when you get to gcc3.4.
|
||||
|
||||
diff --git a/pxr/base/work/singularTask.h b/pxr/base/work/singularTask.h
|
||||
index 67ec0d15f..6dc3e85a0 100644
|
||||
--- a/pxr/base/work/singularTask.h
|
||||
+++ b/pxr/base/work/singularTask.h
|
||||
@@ -120,7 +120,7 @@
|
||||
// case we go again to ensure the task can do whatever it
|
||||
// was awakened to do. Once we successfully take the count
|
||||
// to zero, we stop.
|
||||
- size_t old = count;
|
||||
+ std::size_t old = count;
|
||||
do { _fn(); } while (
|
||||
!count.compare_exchange_strong(old, 0));
|
||||
});
|
||||
|
66
build_files/cmake/Modules/FindShaderC.cmake
Normal file
66
build_files/cmake/Modules/FindShaderC.cmake
Normal file
@@ -0,0 +1,66 @@
|
||||
# - Find SHADERC library
|
||||
# Find the native Haru includes and library
|
||||
# This module defines
|
||||
# SHADERC_INCLUDE_DIRS, where to find hpdf.h, set when
|
||||
# SHADERC_INCLUDE_DIR is found.
|
||||
# SHADERC_LIBRARIES, libraries to link against to use Haru.
|
||||
# SHADERC_ROOT_DIR, The base directory to search for Haru.
|
||||
# This can also be an environment variable.
|
||||
# SHADERC_FOUND, If false, do not try to use Haru.
|
||||
#
|
||||
# also defined, but not for general use are
|
||||
# SHADERC_LIBRARY, where to find the Haru library.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2021 Blender Foundation.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD 3-Clause License,
|
||||
# see accompanying file BSD-3-Clause-license.txt for details.
|
||||
#=============================================================================
|
||||
|
||||
# If SHADERC_ROOT_DIR was defined in the environment, use it.
|
||||
if(NOT SHADERC_ROOT_DIR AND NOT $ENV{SHADERC_ROOT_DIR} STREQUAL "")
|
||||
set(SHADERC_ROOT_DIR $ENV{SHADERC_ROOT_DIR})
|
||||
endif()
|
||||
|
||||
set(_shaderc_SEARCH_DIRS
|
||||
${SHADERC_ROOT_DIR}
|
||||
/opt/lib/haru
|
||||
)
|
||||
|
||||
find_path(SHADERC_INCLUDE_DIR
|
||||
NAMES
|
||||
shaderc.hpp
|
||||
HINTS
|
||||
${_shaderc_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
include/shaderc
|
||||
include
|
||||
)
|
||||
|
||||
find_library(SHADERC_LIBRARY
|
||||
NAMES
|
||||
shaderc_combined
|
||||
shaderc
|
||||
HINTS
|
||||
${_shaderc_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
lib64 lib
|
||||
)
|
||||
|
||||
# Handle the QUIETLY and REQUIRED arguments and set SHADERC_FOUND to TRUE if
|
||||
# all listed variables are TRUE.
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ShaderC DEFAULT_MSG SHADERC_LIBRARY SHADERC_INCLUDE_DIR)
|
||||
|
||||
if(SHADERC_FOUND)
|
||||
set(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
|
||||
set(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
mark_as_advanced(
|
||||
SHADERC_INCLUDE_DIR
|
||||
SHADERC_LIBRARY
|
||||
)
|
||||
|
||||
unset(_shaderc_SEARCH_DIRS)
|
@@ -19,6 +19,9 @@ set(WITH_CODEC_SNDFILE OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_COMPOSITOR OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_COREAUDIO OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_OPTIX OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_EMBREE OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_OSL OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_DRACO OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_FFTW3 OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_FREESTYLE OFF CACHE BOOL "" FORCE)
|
||||
|
@@ -61,7 +61,6 @@ set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE)
|
||||
# platform dependent options
|
||||
if(APPLE)
|
||||
set(WITH_COREAUDIO ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_METAL ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
set(WITH_JACK ON CACHE BOOL "" FORCE)
|
||||
|
@@ -257,6 +257,9 @@ if(WITH_BOOST)
|
||||
if(WITH_INTERNATIONAL)
|
||||
list(APPEND _boost_FIND_COMPONENTS locale)
|
||||
endif()
|
||||
if(WITH_CYCLES_NETWORK)
|
||||
list(APPEND _boost_FIND_COMPONENTS serialization)
|
||||
endif()
|
||||
if(WITH_OPENVDB)
|
||||
list(APPEND _boost_FIND_COMPONENTS iostreams)
|
||||
endif()
|
||||
@@ -336,7 +339,7 @@ if(WITH_LLVM)
|
||||
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(CYCLES_OSL ${LIBDIR}/osl)
|
||||
|
||||
find_library(OSL_LIB_EXEC NAMES oslexec PATHS ${CYCLES_OSL}/lib)
|
||||
@@ -356,7 +359,7 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
find_package(Embree 3.8.0 REQUIRED)
|
||||
# Increase stack size for Embree, only works for executables.
|
||||
if(NOT WITH_PYTHON_MODULE)
|
||||
|
@@ -102,6 +102,11 @@ find_package_wrapper(ZLIB REQUIRED)
|
||||
find_package_wrapper(Zstd REQUIRED)
|
||||
find_package_wrapper(Freetype REQUIRED)
|
||||
|
||||
if(WITH_VULKAN)
|
||||
find_package_wrapper(Vulkan REQUIRED)
|
||||
find_package(ShaderC REQUIRED)
|
||||
endif()
|
||||
|
||||
if(WITH_PYTHON)
|
||||
# No way to set py35, remove for now.
|
||||
# find_package(PythonLibs)
|
||||
@@ -241,7 +246,7 @@ if(WITH_INPUT_NDOF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
|
||||
if(EXISTS ${CYCLES_OSL} AND NOT OSL_ROOT)
|
||||
set(OSL_ROOT ${CYCLES_OSL})
|
||||
@@ -314,7 +319,7 @@ if(WITH_BOOST)
|
||||
endif()
|
||||
set(Boost_USE_MULTITHREADED ON)
|
||||
set(__boost_packages filesystem regex thread date_time)
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
if(WITH_CYCLES_OSL)
|
||||
if(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
|
||||
list(APPEND __boost_packages wave)
|
||||
else()
|
||||
@@ -323,6 +328,9 @@ if(WITH_BOOST)
|
||||
if(WITH_INTERNATIONAL)
|
||||
list(APPEND __boost_packages locale)
|
||||
endif()
|
||||
if(WITH_CYCLES_NETWORK)
|
||||
list(APPEND __boost_packages serialization)
|
||||
endif()
|
||||
if(WITH_OPENVDB)
|
||||
list(APPEND __boost_packages iostreams)
|
||||
endif()
|
||||
@@ -400,7 +408,7 @@ if(WITH_OPENCOLORIO)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
find_package(Embree 3.8.0 REQUIRED)
|
||||
endif()
|
||||
|
||||
|
@@ -477,7 +477,7 @@ if(WITH_PYTHON)
|
||||
endif()
|
||||
|
||||
if(WITH_BOOST)
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(boost_extra_libs wave)
|
||||
endif()
|
||||
if(WITH_INTERNATIONAL)
|
||||
@@ -520,7 +520,7 @@ if(WITH_BOOST)
|
||||
debug ${BOOST_LIBPATH}/libboost_thread-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_LIBPATH}/libboost_chrono-${BOOST_DEBUG_POSTFIX}
|
||||
)
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
|
||||
optimized ${BOOST_LIBPATH}/libboost_wave-${BOOST_POSTFIX}
|
||||
debug ${BOOST_LIBPATH}/libboost_wave-${BOOST_DEBUG_POSTFIX})
|
||||
@@ -708,7 +708,7 @@ if(WITH_CODEC_SNDFILE)
|
||||
set(LIBSNDFILE_LIBRARIES ${LIBSNDFILE_LIBPATH}/libsndfile-1.lib)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
|
||||
set(OSL_SHADER_DIR ${CYCLES_OSL}/shaders)
|
||||
# Shaders have moved around a bit between OSL versions, check multiple locations
|
||||
@@ -741,7 +741,7 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
windows_find_package(Embree)
|
||||
if(NOT EMBREE_FOUND)
|
||||
set(EMBREE_INCLUDE_DIRS ${LIBDIR}/embree/include)
|
||||
@@ -874,5 +874,32 @@ if(WITH_HARU)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
if(EXISTS ${LIBDIR}/vulkan)
|
||||
set(Vulkan_FOUND On)
|
||||
set(Vulkan_ROOT_DIR ${LIBDIR}/vulkan)
|
||||
set(Vulkan_INCLUDE_DIR ${Vulkan_ROOT_DIR}/include)
|
||||
set(Vulkan_INCLUDE_DIRS ${Vulkan_INCLUDE_DIR})
|
||||
set(Vulkan_LIBRARY ${Vulkan_ROOT_DIR}/lib/vulkan-1.lib)
|
||||
set(Vulkan_LIBRARIES ${Vulkan_LIBRARY})
|
||||
else()
|
||||
message(WARNING "vulkan was not found, disabling WITH_VULKAN")
|
||||
set(WITH_VULKAN OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
if(EXISTS ${LIBDIR}/shaderc)
|
||||
set(SHADERC_ROOT_DIR ${LIBDIR}/shaderc)
|
||||
set(SHADERC_INCLUDE_DIR ${SHADERC_ROOT_DIR}/include)
|
||||
set(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
|
||||
set(SHADERC_LIBRARY optimized ${SHADERC_ROOT_DIR}/lib/shaderc_shared.lib debug ${SHADERC_ROOT_DIR}/lib/shaderc_shared_d.lib)
|
||||
set(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
|
||||
else()
|
||||
message(WARNING "shaderc was not found, disabling WITH_VULKAN")
|
||||
set(WITH_VULKAN OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
|
||||
set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
|
||||
|
@@ -6,90 +6,91 @@
|
||||
* as part of the normal development process.
|
||||
*/
|
||||
|
||||
/* TODO: other modules.
|
||||
* - `libmv`
|
||||
* - `cycles`
|
||||
* - `opencolorio`
|
||||
* - `opensubdiv`
|
||||
* - `openvdb`
|
||||
* - `quadriflow`
|
||||
/** \defgroup MEM Guarded memory (de)allocation
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_atomic Atomic Operations
|
||||
* \ingroup intern */
|
||||
/** \defgroup clog C-Logging (CLOG)
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_clog C-Logging (CLOG)
|
||||
* \ingroup intern */
|
||||
/** \defgroup ctr container
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_eigen Eigen
|
||||
* \ingroup intern */
|
||||
/** \defgroup iksolver iksolver
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_glew-mx GLEW with Multiple Rendering Context's
|
||||
* \ingroup intern */
|
||||
/** \defgroup itasc itasc
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_iksolver Inverse Kinematics (Solver)
|
||||
* \ingroup intern */
|
||||
/** \defgroup memutil memutil
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_itasc Inverse Kinematics (ITASC)
|
||||
* \ingroup intern */
|
||||
/** \defgroup mikktspace mikktspace
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_libc_compat libc Compatibility For Linux
|
||||
* \ingroup intern */
|
||||
/** \defgroup moto moto
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_locale Locale
|
||||
* \ingroup intern */
|
||||
/** \defgroup eigen eigen
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_mantaflow Manta-Flow Fluid Simulation
|
||||
* \ingroup intern */
|
||||
/** \defgroup smoke smoke
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup intern_mem Guarded Memory (de)allocation
|
||||
* \ingroup intern */
|
||||
|
||||
/** \defgroup intern_memutil Memory Utilities (memutil)
|
||||
* \ingroup intern */
|
||||
|
||||
/** \defgroup intern_mikktspace MikktSpace
|
||||
* \ingroup intern */
|
||||
|
||||
/** \defgroup intern_numaapi NUMA (Non Uniform Memory Architecture)
|
||||
* \ingroup intern */
|
||||
|
||||
/** \defgroup intern_rigidbody Rigid-Body C-API
|
||||
* \ingroup intern */
|
||||
|
||||
/** \defgroup intern_sky_model Sky Model
|
||||
* \ingroup intern */
|
||||
|
||||
/** \defgroup intern_utf_conv UTF-8/16 Conversion (utfconv)
|
||||
* \ingroup intern */
|
||||
/** \defgroup string string
|
||||
* \ingroup intern
|
||||
*/
|
||||
|
||||
/** \defgroup audaspace Audaspace
|
||||
* \ingroup intern undoc
|
||||
* \todo add to doxygen */
|
||||
* \todo add to doxygen
|
||||
*/
|
||||
/** \defgroup audcoreaudio Audaspace CoreAudio
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audfx Audaspace FX
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audopenal Audaspace OpenAL
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audpulseaudio Audaspace PulseAudio
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audwasapi Audaspace WASAPI
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audpython Audaspace Python
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audsdl Audaspace SDL
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audsrc Audaspace SRC
|
||||
* \ingroup audaspace */
|
||||
*
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audffmpeg Audaspace FFMpeg
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audfftw Audaspace FFTW
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audjack Audaspace Jack
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
/** \defgroup audsndfile Audaspace sndfile
|
||||
* \ingroup audaspace */
|
||||
* \ingroup audaspace
|
||||
*/
|
||||
|
||||
/** \defgroup GHOST GHOST API
|
||||
* \ingroup intern GUI
|
||||
|
@@ -5,8 +5,7 @@
|
||||
/** \defgroup bmesh BMesh
|
||||
* \ingroup blender
|
||||
*/
|
||||
/** \defgroup compositor Compositing
|
||||
* \ingroup blender */
|
||||
/** \defgroup compositor Compositing */
|
||||
|
||||
/** \defgroup python Python
|
||||
* \ingroup blender
|
||||
@@ -79,8 +78,7 @@
|
||||
* \ingroup blender
|
||||
*/
|
||||
|
||||
/** \defgroup data DNA, RNA and .blend access
|
||||
* \ingroup blender */
|
||||
/** \defgroup data DNA, RNA and .blend access*/
|
||||
|
||||
/** \defgroup gpu GPU
|
||||
* \ingroup blender
|
||||
@@ -103,12 +101,11 @@
|
||||
* merged in docs.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \defgroup gui GUI
|
||||
* \ingroup blender */
|
||||
/** \defgroup gui GUI */
|
||||
|
||||
/** \defgroup wm Window Manager
|
||||
* \ingroup gui */
|
||||
* \ingroup blender gui
|
||||
*/
|
||||
|
||||
/* ================================ */
|
||||
|
||||
@@ -282,8 +279,7 @@
|
||||
* \ingroup gui
|
||||
*/
|
||||
|
||||
/** \defgroup externformats External Formats
|
||||
* \ingroup blender */
|
||||
/** \defgroup externformats External Formats */
|
||||
|
||||
/** \defgroup collada COLLADA
|
||||
* \ingroup externformats
|
||||
@@ -312,7 +308,4 @@
|
||||
/* ================================ */
|
||||
|
||||
/** \defgroup undoc Undocumented
|
||||
*
|
||||
* \brief Modules and libraries that are still undocumented,
|
||||
* or lacking proper integration into the doxygen system, are marked in this group.
|
||||
*/
|
||||
* \brief Modules and libraries that are still undocumented, or lacking proper integration into the doxygen system, are marked in this group. */
|
||||
|
@@ -61,7 +61,7 @@ def blender_extract_info(blender_bin: str) -> Dict[str, str]:
|
||||
stdout=subprocess.PIPE,
|
||||
).stdout.decode(encoding="utf-8")
|
||||
|
||||
blender_version_output = subprocess.run(
|
||||
blender_version_ouput = subprocess.run(
|
||||
[blender_bin, "--version"],
|
||||
env=blender_env,
|
||||
check=True,
|
||||
@@ -73,7 +73,7 @@ def blender_extract_info(blender_bin: str) -> Dict[str, str]:
|
||||
# check for each lines prefix to ensure these aren't included.
|
||||
blender_version = ""
|
||||
blender_date = ""
|
||||
for l in blender_version_output.split("\n"):
|
||||
for l in blender_version_ouput.split("\n"):
|
||||
if l.startswith("Blender "):
|
||||
# Remove 'Blender' prefix.
|
||||
blender_version = l.split(" ", 1)[1].strip()
|
||||
|
@@ -106,6 +106,24 @@ including advanced features.
|
||||
floating-point values. These values are interpreted as a plane equation.
|
||||
|
||||
|
||||
.. function:: glColor (red, green, blue, alpha):
|
||||
|
||||
B{glColor3b, glColor3d, glColor3f, glColor3i, glColor3s, glColor3ub, glColor3ui, glColor3us,
|
||||
glColor4b, glColor4d, glColor4f, glColor4i, glColor4s, glColor4ub, glColor4ui, glColor4us,
|
||||
glColor3bv, glColor3dv, glColor3fv, glColor3iv, glColor3sv, glColor3ubv, glColor3uiv,
|
||||
glColor3usv, glColor4bv, glColor4dv, glColor4fv, glColor4iv, glColor4sv, glColor4ubv,
|
||||
glColor4uiv, glColor4usv}
|
||||
|
||||
Set a new color.
|
||||
|
||||
.. seealso:: `OpenGL Docs <https://khronos.org/registry/OpenGL-Refpages/gl4/html/glColor.xhtml>`__
|
||||
|
||||
:type red, green, blue, alpha: Depends on function prototype.
|
||||
:arg red, green, blue: Specify new red, green, and blue values for the current color.
|
||||
:arg alpha: Specifies a new alpha value for the current color. Included only in the
|
||||
four-argument glColor4 commands. (With '4' colors only)
|
||||
|
||||
|
||||
.. function:: glColorMask(red, green, blue, alpha):
|
||||
|
||||
Enable and disable writing of frame buffer color components
|
||||
|
@@ -1103,7 +1103,6 @@ context_type_map = {
|
||||
"selectable_objects": ("Object", True),
|
||||
"selected_asset_files": ("FileSelectEntry", True),
|
||||
"selected_bones": ("EditBone", True),
|
||||
"selected_editable_actions": ("Action", True),
|
||||
"selected_editable_bones": ("EditBone", True),
|
||||
"selected_editable_fcurves": ("FCurve", True),
|
||||
"selected_editable_keyframes": ("Keyframe", True),
|
||||
@@ -1119,13 +1118,12 @@ context_type_map = {
|
||||
"selected_pose_bones": ("PoseBone", True),
|
||||
"selected_pose_bones_from_active_object": ("PoseBone", True),
|
||||
"selected_sequences": ("Sequence", True),
|
||||
"selected_visible_actions": ("Action", True),
|
||||
"selected_visible_fcurves": ("FCurve", True),
|
||||
"sequences": ("Sequence", True),
|
||||
"soft_body": ("SoftBodyModifier", False),
|
||||
"speaker": ("Speaker", False),
|
||||
"texture": ("Texture", False),
|
||||
"texture_slot": ("TextureSlot", False),
|
||||
"texture_slot": ("MaterialTextureSlot", False),
|
||||
"texture_user": ("ID", False),
|
||||
"texture_user_property": ("Property", False),
|
||||
"ui_list": ("UIList", False),
|
||||
|
4
extern/CMakeLists.txt
vendored
4
extern/CMakeLists.txt
vendored
@@ -116,3 +116,7 @@ endif()
|
||||
if (WITH_COMPOSITOR)
|
||||
add_subdirectory(smaa_areatex)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
add_subdirectory(vulkan_memory_allocator)
|
||||
endif()
|
||||
|
12
extern/hipew/README
vendored
12
extern/hipew/README
vendored
@@ -1,12 +0,0 @@
|
||||
The HIP Extension Wrangler Library (HIPEW) is a cross-platform open-source
|
||||
C/C++ library to dynamically load the HIP library.
|
||||
|
||||
HIP (Heterogeneous-Compute Interface for Portability) is an API for C++
|
||||
programming on AMD GPUs.
|
||||
|
||||
It is maintained as part of the Blender project, but included in extern/
|
||||
for consistency with CUEW and CLEW libraries.
|
||||
|
||||
LICENSE
|
||||
|
||||
HIPEW is released under the Apache 2.0 license.
|
5
extern/hipew/README.blender
vendored
5
extern/hipew/README.blender
vendored
@@ -1,5 +0,0 @@
|
||||
Project: Blender
|
||||
URL: https://git.blender.org/blender.git
|
||||
License: Apache 2.0
|
||||
Upstream version: N/A
|
||||
Local modifications: None
|
1
extern/hipew/include/hipew.h
vendored
1
extern/hipew/include/hipew.h
vendored
@@ -1333,7 +1333,6 @@ enum {
|
||||
HIPEW_SUCCESS = 0,
|
||||
HIPEW_ERROR_OPEN_FAILED = -1,
|
||||
HIPEW_ERROR_ATEXIT_FAILED = -2,
|
||||
HIPEW_ERROR_OLD_DRIVER = -3,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
38
extern/hipew/src/hipew.c
vendored
38
extern/hipew/src/hipew.c
vendored
@@ -214,36 +214,6 @@ static void hipewHipExit(void) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static int hipewHasOldDriver(const char *hip_path) {
|
||||
DWORD verHandle = 0;
|
||||
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
|
||||
int old_driver = 0;
|
||||
if (verSize != 0) {
|
||||
LPSTR verData = (LPSTR)malloc(verSize);
|
||||
if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
|
||||
LPBYTE lpBuffer = NULL;
|
||||
UINT size = 0;
|
||||
if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
|
||||
if (size) {
|
||||
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
|
||||
/* Magic value from
|
||||
* https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
|
||||
if (verInfo->dwSignature == 0xfeef04bd) {
|
||||
unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
|
||||
unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
|
||||
/* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
|
||||
old_driver = ((fileVersionLS0 < 3354) || (fileVersionLS0 == 3354 && fileversionLS1 < 13));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(verData);
|
||||
}
|
||||
return old_driver;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int hipewHipInit(void) {
|
||||
/* Library paths. */
|
||||
#ifdef _WIN32
|
||||
@@ -271,14 +241,6 @@ static int hipewHipInit(void) {
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
/* Test for driver version. */
|
||||
if(hipewHasOldDriver(hip_paths[0])) {
|
||||
result = HIPEW_ERROR_OLD_DRIVER;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Load library. */
|
||||
hip_lib = dynamic_library_open_find(hip_paths);
|
||||
|
||||
|
2
extern/nanosvg/README.blender
vendored
2
extern/nanosvg/README.blender
vendored
@@ -1,7 +1,7 @@
|
||||
Project: NanoSVG
|
||||
URL: https://github.com/memononen/nanosvg
|
||||
License: zlib
|
||||
Upstream version: 3cdd4a9d7886
|
||||
Upstream version:
|
||||
Local modifications: Added some functionality to manage grease pencil layers
|
||||
|
||||
Added a fix to SVG import arc and float errors (https://developer.blender.org/rB11dc674c78b49fc4e0b7c134c375b6c8b8eacbcc)
|
||||
|
42
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
Normal file
42
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# ***** BEGIN GPL LICENSE BLOCK *****
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# The Original Code is Copyright (C) 2012, Blender Foundation
|
||||
# All rights reserved.
|
||||
# ***** END GPL LICENSE BLOCK *****
|
||||
|
||||
|
||||
set(INC
|
||||
.
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
${Vulkan_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
set(SRC
|
||||
vk_mem_alloc_impl.cc
|
||||
|
||||
vk_mem_alloc.h
|
||||
)
|
||||
|
||||
blender_add_lib(extern_vulkan_memory_allocator "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
target_compile_options(extern_vulkan_memory_allocator
|
||||
PRIVATE "-Wno-nullability-completeness"
|
||||
)
|
||||
endif()
|
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
Normal file
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
5
extern/vulkan_memory_allocator/README.blender
vendored
Normal file
5
extern/vulkan_memory_allocator/README.blender
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
Project: VulkanMemoryAllocator
|
||||
URL: https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
|
||||
License: MIT
|
||||
Upstream version: 4b047fd
|
||||
Local modifications: None
|
134
extern/vulkan_memory_allocator/README.md
vendored
Normal file
134
extern/vulkan_memory_allocator/README.md
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
# Vulkan Memory Allocator
|
||||
|
||||
Easy to integrate Vulkan memory allocation library.
|
||||
|
||||
**Documentation:** See [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [src/vk_mem_alloc.h](src/vk_mem_alloc.h))
|
||||
|
||||
**License:** MIT. See [LICENSE.txt](LICENSE.txt)
|
||||
|
||||
**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
|
||||
|
||||
**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
|
||||
|
||||
**Build status:**
|
||||
|
||||
- Windows: [](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)
|
||||
- Linux: [](https://travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
|
||||
|
||||
# Problem
|
||||
|
||||
Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics API-s, like D3D11 or OpenGL) for several reasons:
|
||||
|
||||
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
|
||||
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
|
||||
- Driver must be queried for supported memory heaps and memory types. Different IHVs provide different types of it.
|
||||
- It is recommended practice to allocate bigger chunks of memory and assign parts of them to particular resources.
|
||||
|
||||
# Features
|
||||
|
||||
This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
|
||||
|
||||
1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
|
||||
- Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
|
||||
2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
|
||||
- Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
|
||||
3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
|
||||
|
||||
Additional features:
|
||||
|
||||
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
|
||||
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally.
|
||||
- Configuration: Fill optional members of CreateInfo structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
|
||||
- Customization: Predefine appropriate macros to provide your own implementation of all external facilities used by the library, from assert, mutex, and atomic, to vector and linked list.
|
||||
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and you get access to mapped pointer.
|
||||
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
|
||||
- Support for resource aliasing (overlap).
|
||||
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
|
||||
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
|
||||
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
|
||||
- Support for Vulkan 1.0, 1.1, 1.2.
|
||||
- Support for extensions (and equivalent functionality included in new Vulkan versions):
|
||||
- VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
|
||||
- VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
|
||||
- VK_AMD_device_coherent_memory
|
||||
- VK_KHR_buffer_device_address
|
||||
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
|
||||
- Lost allocations: Allocate memory with appropriate flags and let the library remove allocations that are not used for many frames to make room for new ones.
|
||||
- Statistics: Obtain detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
|
||||
- Debug annotations: Associate string with name or opaque pointer to your own data with every allocation.
|
||||
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations and gaps between them.
|
||||
- Convert this JSON dump into a picture to visualize your memory. See [tools/VmaDumpVis](tools/VmaDumpVis/README.md).
|
||||
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number before and after every allocation to detect out-of-bounds memory corruption.
|
||||
- Record and replay sequence of calls to library functions to a file to check correctness, measure performance, and gather statistics.
|
||||
|
||||
# Prequisites
|
||||
|
||||
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. STL containers are not used by default.
|
||||
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
|
||||
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
|
||||
- Interface documented using Doxygen-style comments.
|
||||
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
|
||||
|
||||
# Example
|
||||
|
||||
Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
|
||||
|
||||
```cpp
|
||||
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
bufferInfo.size = 65536;
|
||||
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
|
||||
VmaAllocationCreateInfo allocInfo = {};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
|
||||
```
|
||||
|
||||
With this one function call:
|
||||
|
||||
1. `VkBuffer` is created.
|
||||
2. `VkDeviceMemory` block is allocated if needed.
|
||||
3. An unused region of the memory block is bound to this buffer.
|
||||
|
||||
`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like Vulkan memory handle and offset.
|
||||
|
||||
# Binaries
|
||||
|
||||
The release comes with precompiled binary executables for "VulkanSample" application which contains test suite and "VmaReplay" tool. They are compiled using Visual Studio 2019, so they require appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If their launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
|
||||
|
||||
# Read more
|
||||
|
||||
See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
|
||||
|
||||
# Software using this library
|
||||
|
||||
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
|
||||
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
|
||||
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
|
||||
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
|
||||
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
|
||||
- **[Flax Engine](https://flaxengine.com/)**
|
||||
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
|
||||
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
|
||||
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
|
||||
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
|
||||
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
|
||||
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
|
||||
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
|
||||
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
|
||||
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
|
||||
|
||||
[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
|
||||
|
||||
# See also
|
||||
|
||||
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
|
||||
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
|
||||
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
|
||||
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
|
||||
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
|
||||
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Joe Hermaszewski (@expipiplus1). License BSD-3-Clause.
|
||||
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
|
||||
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.
|
19560
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
Normal file
19560
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3
extern/vulkan_memory_allocator/vk_mem_alloc_impl.cc
vendored
Normal file
3
extern/vulkan_memory_allocator/vk_mem_alloc_impl.cc
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
#define VMA_IMPLEMENTATION
|
||||
|
||||
#include "vk_mem_alloc.h"
|
@@ -85,3 +85,7 @@ endif()
|
||||
if(UNIX AND NOT APPLE)
|
||||
add_subdirectory(libc_compat)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
add_subdirectory(shader_compiler)
|
||||
endif()
|
||||
|
@@ -45,7 +45,7 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_atomic
|
||||
* \ingroup Atomic
|
||||
*
|
||||
* \brief Provides wrapper around system-specific atomic primitives,
|
||||
* and some extensions (faked-atomic operations over float numbers).
|
||||
|
@@ -44,10 +44,6 @@
|
||||
* The Original Code is: adapted from jemalloc.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_atomic
|
||||
*/
|
||||
|
||||
#ifndef __ATOMIC_OPS_EXT_H__
|
||||
#define __ATOMIC_OPS_EXT_H__
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||
*
|
||||
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||
@@ -13,7 +13,7 @@
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
@@ -26,10 +26,6 @@
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_atomic
|
||||
*/
|
||||
|
||||
#ifndef __ATOMIC_OPS_MSVC_H__
|
||||
#define __ATOMIC_OPS_MSVC_H__
|
||||
|
||||
|
@@ -44,10 +44,6 @@
|
||||
* The Original Code is: adapted from jemalloc.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_atomic
|
||||
*/
|
||||
|
||||
#ifndef __ATOMIC_OPS_UNIX_H__
|
||||
#define __ATOMIC_OPS_UNIX_H__
|
||||
|
||||
|
@@ -44,10 +44,6 @@
|
||||
* The Original Code is: adapted from jemalloc.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_atomic
|
||||
*/
|
||||
|
||||
#ifndef __ATOMIC_OPS_UTILS_H__
|
||||
#define __ATOMIC_OPS_UTILS_H__
|
||||
|
||||
|
@@ -14,8 +14,11 @@
|
||||
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __CLG_LOG_H__
|
||||
#define __CLG_LOG_H__
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_clog
|
||||
* \ingroup clog
|
||||
*
|
||||
* C Logging Library (clog)
|
||||
* ========================
|
||||
@@ -65,9 +68,6 @@
|
||||
* - 4+: May be used for more details than 3, should be avoided but not prevented.
|
||||
*/
|
||||
|
||||
#ifndef __CLG_LOG_H__
|
||||
#define __CLG_LOG_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
@@ -15,7 +15,7 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \ingroup intern_clog
|
||||
* \ingroup clog
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
@@ -388,7 +388,7 @@ static void clg_ctx_fatal_action(CLogContext *ctx)
|
||||
|
||||
static void clg_ctx_backtrace(CLogContext *ctx)
|
||||
{
|
||||
/* NOTE: we avoid writing to 'FILE', for back-trace we make an exception,
|
||||
/* Note: we avoid writing to 'FILE', for back-trace we make an exception,
|
||||
* if necessary we could have a version of the callback that writes to file
|
||||
* descriptor all at once. */
|
||||
ctx->callbacks.backtrace_fn(ctx->output_file);
|
||||
|
@@ -40,7 +40,6 @@ set(SRC
|
||||
object_cull.cpp
|
||||
output_driver.cpp
|
||||
particles.cpp
|
||||
pointcloud.cpp
|
||||
curves.cpp
|
||||
logging.cpp
|
||||
python.cpp
|
||||
@@ -88,7 +87,6 @@ endif()
|
||||
|
||||
set(ADDON_FILES
|
||||
addon/__init__.py
|
||||
addon/camera.py
|
||||
addon/engine.py
|
||||
addon/operators.py
|
||||
addon/osl.py
|
||||
@@ -103,11 +101,6 @@ add_definitions(${GL_DEFINITIONS})
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
add_definitions(-DWITH_METAL)
|
||||
endif()
|
||||
|
||||
if(WITH_MOD_FLUID)
|
||||
add_definitions(-DWITH_FLUID)
|
||||
endif()
|
||||
|
@@ -1,84 +0,0 @@
|
||||
#
|
||||
# Copyright 2011-2021 Blender Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# <pep8 compliant>
|
||||
|
||||
# Fit to match default projective camera with focal_length 50 and sensor_width 36.
|
||||
default_fisheye_polynomial = [-1.1735143712967577e-05,
|
||||
-0.019988736953434998,
|
||||
-3.3525322965709175e-06,
|
||||
3.099275275886036e-06,
|
||||
-2.6064646454854524e-08]
|
||||
|
||||
# Utilities to generate lens polynomials to match built-in camera types, only here
|
||||
# for reference at the moment, not used by the code.
|
||||
def create_grid(sensor_height, sensor_width):
|
||||
import numpy as np
|
||||
if sensor_height is None:
|
||||
sensor_height = sensor_width / (16 / 9) # Default aspect ration 16:9
|
||||
uu, vv = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))
|
||||
uu = (uu - 0.5) * sensor_width
|
||||
vv = (vv - 0.5) * sensor_height
|
||||
rr = np.sqrt(uu ** 2 + vv ** 2)
|
||||
return rr
|
||||
|
||||
|
||||
def fisheye_lens_polynomial_from_projective(focal_length=50, sensor_width=36, sensor_height=None):
|
||||
import numpy as np
|
||||
rr = create_grid(sensor_height, sensor_width)
|
||||
polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length)).flat, 4)
|
||||
return list(reversed(polynomial))
|
||||
|
||||
|
||||
def fisheye_lens_polynomial_from_projective_fov(fov, sensor_width=36, sensor_height=None):
|
||||
import numpy as np
|
||||
f = sensor_width / 2 / np.tan(fov / 2)
|
||||
return fisheye_lens_polynomial_from_projective(f, sensor_width, sensor_height)
|
||||
|
||||
|
||||
def fisheye_lens_polynomial_from_equisolid(lens=10.5, sensor_width=36, sensor_height=None):
|
||||
import numpy as np
|
||||
rr = create_grid(sensor_height, sensor_width)
|
||||
x = rr.reshape(-1)
|
||||
x = np.stack([x**i for i in [1, 2, 3, 4]])
|
||||
y = (-2 * np.arcsin(rr / (2 * lens))).reshape(-1)
|
||||
polynomial = np.linalg.lstsq(x.T, y.T, rcond=None)[0]
|
||||
return [0] + list(polynomial)
|
||||
|
||||
|
||||
def fisheye_lens_polynomial_from_equidistant(fov=180, sensor_width=36, sensor_height=None):
|
||||
import numpy as np
|
||||
return [0, -np.radians(fov) / sensor_width, 0, 0, 0]
|
||||
|
||||
|
||||
def fisheye_lens_polynomial_from_distorted_projective_polynomial(k1, k2, k3, focal_length=50, sensor_width=36, sensor_height=None):
|
||||
import numpy as np
|
||||
rr = create_grid(sensor_height, sensor_width)
|
||||
r2 = (rr / focal_length) ** 2
|
||||
r4 = r2 * r2
|
||||
r6 = r4 * r2
|
||||
r_coeff = 1 + k1 * r2 + k2 * r4 + k3 * r6
|
||||
polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length * r_coeff)).flat, 4)
|
||||
return list(reversed(polynomial))
|
||||
|
||||
def fisheye_lens_polynomial_from_distorted_projective_divisions(k1, k2, focal_length=50, sensor_width=36, sensor_height=None):
|
||||
import numpy as np
|
||||
rr = create_grid(sensor_height, sensor_width)
|
||||
r2 = (rr / focal_length) ** 2
|
||||
r4 = r2 * r2
|
||||
r_coeff = 1 + k1 * r2 + k2 * r4
|
||||
polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length / r_coeff)).flat, 4)
|
||||
return list(reversed(polynomial))
|
@@ -28,7 +28,7 @@ def _configure_argument_parser():
|
||||
action='store_true')
|
||||
parser.add_argument("--cycles-device",
|
||||
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
|
||||
"Valid options are 'CPU', 'CUDA', 'OPTIX', 'HIP' or 'METAL'."
|
||||
"Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
|
||||
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
|
||||
default=None)
|
||||
return parser
|
||||
|
@@ -33,7 +33,6 @@ from math import pi
|
||||
# enums
|
||||
|
||||
from . import engine
|
||||
from . import camera
|
||||
|
||||
enum_devices = (
|
||||
('CPU', "CPU", "Use CPU for rendering"),
|
||||
@@ -73,8 +72,6 @@ enum_panorama_types = (
|
||||
('FISHEYE_EQUISOLID', "Fisheye Equisolid",
|
||||
"Similar to most fisheye modern lens, takes sensor dimensions into consideration"),
|
||||
('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"),
|
||||
('FISHEYE_LENS_POLYNOMIAL', "Fisheye Lens Polynomial",
|
||||
"Defines the lens projection as polynomial to allow real world camera lenses to be mimicked."),
|
||||
)
|
||||
|
||||
enum_curve_shape = (
|
||||
@@ -114,8 +111,7 @@ enum_device_type = (
|
||||
('CPU', "CPU", "CPU", 0),
|
||||
('CUDA', "CUDA", "CUDA", 1),
|
||||
('OPTIX', "OptiX", "OptiX", 3),
|
||||
('HIP', "HIP", "HIP", 4),
|
||||
('METAL', "Metal", "Metal", 5)
|
||||
("HIP", "HIP", "HIP", 4)
|
||||
)
|
||||
|
||||
enum_texture_limit = (
|
||||
@@ -433,7 +429,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
)
|
||||
|
||||
direct_light_sampling_type: EnumProperty(
|
||||
name="Direct Light Sampling",
|
||||
name="Direct Light Sampling Type",
|
||||
description="The type of strategy used for sampling direct light contributions",
|
||||
items=enum_direct_light_sampling_type,
|
||||
default='MULTIPLE_IMPORTANCE_SAMPLING',
|
||||
@@ -794,7 +790,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
)
|
||||
|
||||
use_auto_tile: BoolProperty(
|
||||
name="Use Tiling",
|
||||
name="Using Tiling",
|
||||
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||
default=True,
|
||||
)
|
||||
@@ -894,32 +890,6 @@ class CyclesCameraSettings(bpy.types.PropertyGroup):
|
||||
default=pi,
|
||||
)
|
||||
|
||||
fisheye_polynomial_k0: FloatProperty(
|
||||
name="Fisheye Polynomial K0",
|
||||
description="Coefficient K0 of the lens polinomial",
|
||||
default=camera.default_fisheye_polynomial[0], precision=6, step=0.1, subtype='ANGLE',
|
||||
)
|
||||
fisheye_polynomial_k1: FloatProperty(
|
||||
name="Fisheye Polynomial K1",
|
||||
description="Coefficient K1 of the lens polinomial",
|
||||
default=camera.default_fisheye_polynomial[1], precision=6, step=0.1, subtype='ANGLE',
|
||||
)
|
||||
fisheye_polynomial_k2: FloatProperty(
|
||||
name="Fisheye Polynomial K2",
|
||||
description="Coefficient K2 of the lens polinomial",
|
||||
default=camera.default_fisheye_polynomial[2], precision=6, step=0.1, subtype='ANGLE',
|
||||
)
|
||||
fisheye_polynomial_k3: FloatProperty(
|
||||
name="Fisheye Polynomial K3",
|
||||
description="Coefficient K3 of the lens polinomial",
|
||||
default=camera.default_fisheye_polynomial[3], precision=6, step=0.1, subtype='ANGLE',
|
||||
)
|
||||
fisheye_polynomial_k4: FloatProperty(
|
||||
name="Fisheye Polynomial K4",
|
||||
description="Coefficient K4 of the lens polinomial",
|
||||
default=camera.default_fisheye_polynomial[4], precision=6, step=0.1, subtype='ANGLE',
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def register(cls):
|
||||
bpy.types.Camera.cycles = PointerProperty(
|
||||
@@ -1342,7 +1312,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def get_device_types(self, context):
|
||||
import _cycles
|
||||
has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
|
||||
has_cuda, has_optix, has_hip = _cycles.get_device_types()
|
||||
|
||||
list = [('NONE', "None", "Don't use compute device", 0)]
|
||||
if has_cuda:
|
||||
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
|
||||
@@ -1350,8 +1321,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
|
||||
if has_hip:
|
||||
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
|
||||
if has_metal:
|
||||
list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))
|
||||
|
||||
return list
|
||||
|
||||
@@ -1377,7 +1346,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def update_device_entries(self, device_list):
|
||||
for device in device_list:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
|
||||
continue
|
||||
# Try to find existing Device entry
|
||||
entry = self.find_existing_device_entry(device)
|
||||
@@ -1421,7 +1390,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
import _cycles
|
||||
# Ensure `self.devices` is not re-allocated when the second call to
|
||||
# get_devices_for_type is made, freeing items from the first list.
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP'):
|
||||
self.update_device_entries(_cycles.available_devices(device_type))
|
||||
|
||||
# Deprecated: use refresh_devices instead.
|
||||
@@ -1473,8 +1442,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
if sys.platform[:3] == "win":
|
||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||
elif device_type == 'METAL':
|
||||
col.label(text="Requires Apple Silicon and macOS 12.0 or newer", icon='BLANK1')
|
||||
return
|
||||
|
||||
for device in devices:
|
||||
|
@@ -97,11 +97,6 @@ def use_cpu(context):
|
||||
return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
|
||||
|
||||
|
||||
def use_metal(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
return (get_device_type(context) == 'METAL' and cscene.device == 'GPU')
|
||||
|
||||
def use_cuda(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
@@ -1020,7 +1015,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
|
||||
def poll(cls, context):
|
||||
ob = context.object
|
||||
if CyclesButtonsPanel.poll(context) and ob:
|
||||
if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA', 'HAIR', 'POINTCLOUD'}:
|
||||
if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}:
|
||||
return True
|
||||
if ob.instance_type == 'COLLECTION' and ob.instance_collection:
|
||||
return True
|
||||
@@ -1824,38 +1819,37 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
layout.use_property_split = True
|
||||
layout.use_property_decorate = False # No animation.
|
||||
|
||||
scene = context.scene
|
||||
cscene = scene.cycles
|
||||
|
||||
col = layout.column(heading="CPU")
|
||||
col = layout.column()
|
||||
|
||||
col.label(text="CPU Flags:")
|
||||
row = col.row(align=True)
|
||||
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
|
||||
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
|
||||
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
|
||||
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
|
||||
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
|
||||
col.prop(cscene, "debug_bvh_layout", text="BVH")
|
||||
col.prop(cscene, "debug_bvh_layout")
|
||||
|
||||
col.separator()
|
||||
|
||||
col = layout.column(heading="CUDA")
|
||||
col = layout.column()
|
||||
col.label(text="CUDA Flags:")
|
||||
col.prop(cscene, "debug_use_cuda_adaptive_compile")
|
||||
col = layout.column(heading="OptiX")
|
||||
col.prop(cscene, "debug_use_optix_debug", text="Module Debug")
|
||||
|
||||
col.separator()
|
||||
|
||||
col.prop(cscene, "debug_bvh_type", text="Viewport BVH")
|
||||
col = layout.column()
|
||||
col.label(text="OptiX Flags:")
|
||||
col.prop(cscene, "debug_use_optix_debug")
|
||||
|
||||
col.separator()
|
||||
|
||||
import _cycles
|
||||
if _cycles.with_debug:
|
||||
col.prop(cscene, "direct_light_sampling_type")
|
||||
col = layout.column()
|
||||
col.prop(cscene, "debug_bvh_type")
|
||||
|
||||
|
||||
class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):
|
||||
|
@@ -69,12 +69,6 @@ struct BlenderCamera {
|
||||
float pole_merge_angle_from;
|
||||
float pole_merge_angle_to;
|
||||
|
||||
float fisheye_polynomial_k0;
|
||||
float fisheye_polynomial_k1;
|
||||
float fisheye_polynomial_k2;
|
||||
float fisheye_polynomial_k3;
|
||||
float fisheye_polynomial_k4;
|
||||
|
||||
enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
|
||||
float sensor_width;
|
||||
float sensor_height;
|
||||
@@ -206,12 +200,6 @@ static void blender_camera_from_object(BlenderCamera *bcam,
|
||||
bcam->longitude_min = RNA_float_get(&ccamera, "longitude_min");
|
||||
bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");
|
||||
|
||||
bcam->fisheye_polynomial_k0 = RNA_float_get(&ccamera, "fisheye_polynomial_k0");
|
||||
bcam->fisheye_polynomial_k1 = RNA_float_get(&ccamera, "fisheye_polynomial_k1");
|
||||
bcam->fisheye_polynomial_k2 = RNA_float_get(&ccamera, "fisheye_polynomial_k2");
|
||||
bcam->fisheye_polynomial_k3 = RNA_float_get(&ccamera, "fisheye_polynomial_k3");
|
||||
bcam->fisheye_polynomial_k4 = RNA_float_get(&ccamera, "fisheye_polynomial_k4");
|
||||
|
||||
bcam->interocular_distance = b_camera.stereo().interocular_distance();
|
||||
if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
|
||||
bcam->convergence_distance = FLT_MAX;
|
||||
@@ -434,8 +422,7 @@ static void blender_camera_sync(Camera *cam,
|
||||
cam->set_full_height(height);
|
||||
|
||||
/* panorama sensor */
|
||||
if (bcam->type == CAMERA_PANORAMA && (bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID ||
|
||||
bcam->panorama_type == PANORAMA_FISHEYE_LENS_POLYNOMIAL)) {
|
||||
if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
|
||||
float fit_xratio = (float)bcam->render_width * bcam->pixelaspect.x;
|
||||
float fit_yratio = (float)bcam->render_height * bcam->pixelaspect.y;
|
||||
bool horizontal_fit;
|
||||
@@ -478,12 +465,6 @@ static void blender_camera_sync(Camera *cam,
|
||||
cam->set_latitude_min(bcam->latitude_min);
|
||||
cam->set_latitude_max(bcam->latitude_max);
|
||||
|
||||
cam->set_fisheye_polynomial_k0(bcam->fisheye_polynomial_k0);
|
||||
cam->set_fisheye_polynomial_k1(bcam->fisheye_polynomial_k1);
|
||||
cam->set_fisheye_polynomial_k2(bcam->fisheye_polynomial_k2);
|
||||
cam->set_fisheye_polynomial_k3(bcam->fisheye_polynomial_k3);
|
||||
cam->set_fisheye_polynomial_k4(bcam->fisheye_polynomial_k4);
|
||||
|
||||
cam->set_longitude_min(bcam->longitude_min);
|
||||
cam->set_longitude_max(bcam->longitude_max);
|
||||
|
||||
|
@@ -819,14 +819,11 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
|
||||
new_hair.set_used_shaders(used_shaders);
|
||||
|
||||
if (view_layer.use_hair) {
|
||||
#ifdef WITH_HAIR_NODES
|
||||
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
||||
/* Hair object. */
|
||||
sync_hair(&new_hair, b_ob_info, false);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
else {
|
||||
/* Particle hair. */
|
||||
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
||||
BL::Mesh b_mesh = object_to_mesh(
|
||||
@@ -873,15 +870,12 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
|
||||
|
||||
/* Export deformed coordinates. */
|
||||
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||
#ifdef WITH_HAIR_NODES
|
||||
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
||||
/* Hair object. */
|
||||
sync_hair(hair, b_ob_info, true, motion_step);
|
||||
return;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
else {
|
||||
/* Particle hair. */
|
||||
BL::Mesh b_mesh = object_to_mesh(
|
||||
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
||||
|
@@ -27,7 +27,6 @@ enum ComputeDevice {
|
||||
COMPUTE_DEVICE_CUDA = 1,
|
||||
COMPUTE_DEVICE_OPTIX = 3,
|
||||
COMPUTE_DEVICE_HIP = 4,
|
||||
COMPUTE_DEVICE_METAL = 5,
|
||||
|
||||
COMPUTE_DEVICE_NUM
|
||||
};
|
||||
@@ -86,9 +85,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
|
||||
else if (compute_device == COMPUTE_DEVICE_HIP) {
|
||||
mask |= DEVICE_MASK_HIP;
|
||||
}
|
||||
else if (compute_device == COMPUTE_DEVICE_METAL) {
|
||||
mask |= DEVICE_MASK_METAL;
|
||||
}
|
||||
vector<DeviceInfo> devices = Device::available_devices(mask);
|
||||
|
||||
/* Match device preferences and available devices. */
|
||||
|
@@ -19,7 +19,6 @@
|
||||
#include "scene/hair.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/volume.h"
|
||||
|
||||
#include "blender/sync.h"
|
||||
@@ -32,18 +31,10 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
||||
{
|
||||
#ifdef WITH_HAIR_NODES
|
||||
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
||||
#else
|
||||
if (use_particle_hair) {
|
||||
#endif
|
||||
return Geometry::HAIR;
|
||||
}
|
||||
|
||||
if (b_ob_info.object_data.is_a(&RNA_PointCloud)) {
|
||||
return Geometry::POINTCLOUD;
|
||||
}
|
||||
|
||||
if (b_ob_info.object_data.is_a(&RNA_Volume) ||
|
||||
(b_ob_info.object_data == b_ob_info.real_object.data() &&
|
||||
object_fluid_gas_domain_find(b_ob_info.real_object))) {
|
||||
@@ -116,9 +107,6 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
|
||||
else if (geom_type == Geometry::VOLUME) {
|
||||
geom = scene->create_node<Volume>();
|
||||
}
|
||||
else if (geom_type == Geometry::POINTCLOUD) {
|
||||
geom = scene->create_node<PointCloud>();
|
||||
}
|
||||
else {
|
||||
geom = scene->create_node<Mesh>();
|
||||
}
|
||||
@@ -178,10 +166,6 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
|
||||
Volume *volume = static_cast<Volume *>(geom);
|
||||
sync_volume(b_ob_info, volume);
|
||||
}
|
||||
else if (geom_type == Geometry::POINTCLOUD) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
sync_pointcloud(pointcloud, b_ob_info);
|
||||
}
|
||||
else {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
sync_mesh(b_depsgraph, b_ob_info, mesh);
|
||||
@@ -231,11 +215,7 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
|
||||
#ifdef WITH_HAIR_NODES
|
||||
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
||||
#else
|
||||
if (use_particle_hair) {
|
||||
#endif
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
||||
}
|
||||
@@ -243,10 +223,6 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
||||
object_fluid_gas_domain_find(b_ob_info.real_object)) {
|
||||
/* No volume motion blur support yet. */
|
||||
}
|
||||
else if (b_ob_info.object_data.is_a(&RNA_PointCloud)) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
sync_pointcloud_motion(pointcloud, b_ob_info, motion_step);
|
||||
}
|
||||
else {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
sync_mesh_motion(b_depsgraph, b_ob_info, mesh, motion_step);
|
||||
|
@@ -24,14 +24,8 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Packed Images */
|
||||
|
||||
BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
|
||||
const int frame,
|
||||
const bool is_preview_render)
|
||||
: b_image(b_image),
|
||||
frame(frame),
|
||||
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
|
||||
properly later as we are close to release. */
|
||||
free_cache(!is_preview_render && !b_image.has_data())
|
||||
BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
|
||||
: b_image(b_image), frame(frame), free_cache(!b_image.has_data())
|
||||
{
|
||||
}
|
||||
|
||||
|
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BlenderImageLoader : public ImageLoader {
|
||||
public:
|
||||
BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render);
|
||||
BlenderImageLoader(BL::Image b_image, int frame);
|
||||
|
||||
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
|
||||
bool load_pixels(const ImageMetaData &metadata,
|
||||
|
@@ -72,8 +72,7 @@ bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)
|
||||
|
||||
BL::Object::type_enum type = b_ob_info.iter_object.type();
|
||||
|
||||
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR ||
|
||||
type == BL::Object::type_POINTCLOUD) {
|
||||
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
|
||||
/* Will be exported attached to mesh. */
|
||||
return true;
|
||||
}
|
||||
@@ -207,7 +206,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* only interested in object that we can create geometry from */
|
||||
/* only interested in object that we can create meshes from */
|
||||
if (!object_is_geometry(b_ob_info)) {
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -66,7 +66,7 @@ bool BlenderOutputDriver::read_render_tile(const Tile &tile)
|
||||
|
||||
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be updated
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated
|
||||
* quickly on Blender side. For all other cases we use the display driver. */
|
||||
if (b_engine_.is_preview()) {
|
||||
write_render_tile(tile);
|
||||
|
@@ -1,253 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/scene.h"
|
||||
|
||||
#include "blender/sync.h"
|
||||
#include "blender/util.h"
|
||||
|
||||
#include "util/foreach.h"
|
||||
#include "util/hash.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
template<typename TypeInCycles, typename GetValueAtIndex>
|
||||
static void fill_generic_attribute(BL::PointCloud &b_pointcloud,
|
||||
TypeInCycles *data,
|
||||
const GetValueAtIndex &get_value_at_index)
|
||||
{
|
||||
const int num_points = b_pointcloud.points.length();
|
||||
for (int i = 0; i < num_points; i++) {
|
||||
data[i] = get_value_at_index(i);
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_attributes(PointCloud *pointcloud, BL::PointCloud b_pointcloud)
|
||||
{
|
||||
AttributeSet &attributes = pointcloud->attributes;
|
||||
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
|
||||
const ustring name{b_attribute.name().c_str()};
|
||||
|
||||
if (attributes.find(name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const AttributeElement element = ATTR_ELEMENT_VERTEX;
|
||||
const BL::Attribute::data_type_enum b_data_type = b_attribute.data_type();
|
||||
switch (b_data_type) {
|
||||
case BL::Attribute::data_type_FLOAT: {
|
||||
BL::FloatAttribute b_float_attribute{b_attribute};
|
||||
Attribute *attr = attributes.add(name, TypeFloat, element);
|
||||
float *data = attr->data_float();
|
||||
fill_generic_attribute(
|
||||
b_pointcloud, data, [&](int i) { return b_float_attribute.data[i].value(); });
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_BOOLEAN: {
|
||||
BL::BoolAttribute b_bool_attribute{b_attribute};
|
||||
Attribute *attr = attributes.add(name, TypeFloat, element);
|
||||
float *data = attr->data_float();
|
||||
fill_generic_attribute(
|
||||
b_pointcloud, data, [&](int i) { return (float)b_bool_attribute.data[i].value(); });
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_INT: {
|
||||
BL::IntAttribute b_int_attribute{b_attribute};
|
||||
Attribute *attr = attributes.add(name, TypeFloat, element);
|
||||
float *data = attr->data_float();
|
||||
fill_generic_attribute(
|
||||
b_pointcloud, data, [&](int i) { return (float)b_int_attribute.data[i].value(); });
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_FLOAT_VECTOR: {
|
||||
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
|
||||
Attribute *attr = attributes.add(name, TypeVector, element);
|
||||
float3 *data = attr->data_float3();
|
||||
fill_generic_attribute(b_pointcloud, data, [&](int i) {
|
||||
BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
|
||||
return make_float3(v[0], v[1], v[2]);
|
||||
});
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_FLOAT_COLOR: {
|
||||
BL::FloatColorAttribute b_color_attribute{b_attribute};
|
||||
Attribute *attr = attributes.add(name, TypeRGBA, element);
|
||||
float4 *data = attr->data_float4();
|
||||
fill_generic_attribute(b_pointcloud, data, [&](int i) {
|
||||
BL::Array<float, 4> v = b_color_attribute.data[i].color();
|
||||
return make_float4(v[0], v[1], v[2], v[3]);
|
||||
});
|
||||
break;
|
||||
}
|
||||
case BL::Attribute::data_type_FLOAT2: {
|
||||
BL::Float2Attribute b_float2_attribute{b_attribute};
|
||||
Attribute *attr = attributes.add(name, TypeFloat2, element);
|
||||
float2 *data = attr->data_float2();
|
||||
fill_generic_attribute(b_pointcloud, data, [&](int i) {
|
||||
BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
|
||||
return make_float2(v[0], v[1]);
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* Not supported. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void export_pointcloud(Scene *scene, PointCloud *pointcloud, BL::PointCloud b_pointcloud)
|
||||
{
|
||||
/* TODO: optimize so we can straight memcpy arrays from Blender? */
|
||||
|
||||
/* Add requested attributes. */
|
||||
Attribute *attr_random = NULL;
|
||||
if (pointcloud->need_attribute(scene, ATTR_STD_POINT_RANDOM)) {
|
||||
attr_random = pointcloud->attributes.add(ATTR_STD_POINT_RANDOM);
|
||||
}
|
||||
|
||||
/* Reserve memory. */
|
||||
const int num_points = b_pointcloud.points.length();
|
||||
pointcloud->reserve(num_points);
|
||||
|
||||
/* Export points. */
|
||||
BL::PointCloud::points_iterator b_point_iter;
|
||||
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||
++b_point_iter) {
|
||||
BL::Point b_point = *b_point_iter;
|
||||
const float3 co = get_float3(b_point.co());
|
||||
const float radius = b_point.radius();
|
||||
pointcloud->add_point(co, radius);
|
||||
|
||||
/* Random number per point. */
|
||||
if (attr_random != NULL) {
|
||||
attr_random->add(hash_uint2_to_float(b_point.index(), 0));
|
||||
}
|
||||
}
|
||||
|
||||
/* Export attributes */
|
||||
copy_attributes(pointcloud, b_pointcloud);
|
||||
}
|
||||
|
||||
static void export_pointcloud_motion(PointCloud *pointcloud,
|
||||
BL::PointCloud b_pointcloud,
|
||||
int motion_step)
|
||||
{
|
||||
/* Find or add attribute. */
|
||||
Attribute *attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
bool new_attribute = false;
|
||||
|
||||
if (!attr_mP) {
|
||||
attr_mP = pointcloud->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
new_attribute = true;
|
||||
}
|
||||
|
||||
/* Export motion points. */
|
||||
const int num_points = pointcloud->num_points();
|
||||
float3 *mP = attr_mP->data_float3() + motion_step * num_points;
|
||||
bool have_motion = false;
|
||||
int num_motion_points = 0;
|
||||
const array<float3> &pointcloud_points = pointcloud->get_points();
|
||||
|
||||
BL::PointCloud::points_iterator b_point_iter;
|
||||
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||
++b_point_iter) {
|
||||
BL::Point b_point = *b_point_iter;
|
||||
|
||||
if (num_motion_points < num_points) {
|
||||
float3 P = get_float3(b_point.co());
|
||||
P.w = b_point.radius();
|
||||
mP[num_motion_points] = P;
|
||||
have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
|
||||
num_motion_points++;
|
||||
}
|
||||
}
|
||||
|
||||
/* In case of new attribute, we verify if there really was any motion. */
|
||||
if (new_attribute) {
|
||||
if (num_motion_points != num_points || !have_motion) {
|
||||
pointcloud->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
else if (motion_step > 0) {
|
||||
/* Motion, fill up previous steps that we might have skipped because
|
||||
* they had no motion, but we need them anyway now. */
|
||||
for (int step = 0; step < motion_step; step++) {
|
||||
pointcloud->copy_center_to_motion_step(step);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Export attributes */
|
||||
copy_attributes(pointcloud, b_pointcloud);
|
||||
}
|
||||
|
||||
void BlenderSync::sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info)
|
||||
{
|
||||
size_t old_numpoints = pointcloud->num_points();
|
||||
|
||||
array<Node *> used_shaders = pointcloud->get_used_shaders();
|
||||
|
||||
PointCloud new_pointcloud;
|
||||
new_pointcloud.set_used_shaders(used_shaders);
|
||||
|
||||
/* TODO: add option to filter out points in the view layer. */
|
||||
BL::PointCloud b_pointcloud(b_ob_info.object_data);
|
||||
export_pointcloud(scene, &new_pointcloud, b_pointcloud);
|
||||
|
||||
/* update original sockets */
|
||||
for (const SocketType &socket : new_pointcloud.type->inputs) {
|
||||
/* Those sockets are updated in sync_object, so do not modify them. */
|
||||
if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
|
||||
socket.name == "used_shaders") {
|
||||
continue;
|
||||
}
|
||||
pointcloud->set_value(socket, new_pointcloud, socket);
|
||||
}
|
||||
|
||||
pointcloud->attributes.clear();
|
||||
foreach (Attribute &attr, new_pointcloud.attributes.attributes) {
|
||||
pointcloud->attributes.attributes.push_back(std::move(attr));
|
||||
}
|
||||
|
||||
/* tag update */
|
||||
const bool rebuild = (pointcloud && old_numpoints != pointcloud->num_points());
|
||||
pointcloud->tag_update(scene, rebuild);
|
||||
}
|
||||
|
||||
void BlenderSync::sync_pointcloud_motion(PointCloud *pointcloud,
|
||||
BObjectInfo &b_ob_info,
|
||||
int motion_step)
|
||||
{
|
||||
/* Skip if nothing exported. */
|
||||
if (pointcloud->num_points() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Export deformed coordinates. */
|
||||
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||
/* PointCloud object. */
|
||||
BL::PointCloud b_pointcloud(b_ob_info.object_data);
|
||||
export_pointcloud_motion(pointcloud, b_pointcloud, motion_step);
|
||||
}
|
||||
else {
|
||||
/* No deformation on this frame, copy coordinates if other frames did have it. */
|
||||
pointcloud->copy_center_to_motion_step(motion_step);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -906,18 +906,16 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
|
||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||
{
|
||||
vector<DeviceType> device_types = Device::available_types();
|
||||
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
|
||||
bool has_cuda = false, has_optix = false, has_hip = false;
|
||||
foreach (DeviceType device_type, device_types) {
|
||||
has_cuda |= (device_type == DEVICE_CUDA);
|
||||
has_optix |= (device_type == DEVICE_OPTIX);
|
||||
has_hip |= (device_type == DEVICE_HIP);
|
||||
has_metal |= (device_type == DEVICE_METAL);
|
||||
}
|
||||
PyObject *list = PyTuple_New(4);
|
||||
PyObject *list = PyTuple_New(3);
|
||||
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
|
||||
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
|
||||
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
|
||||
PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
|
||||
return list;
|
||||
}
|
||||
|
||||
@@ -946,9 +944,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
|
||||
else if (override == "HIP") {
|
||||
BlenderSession::device_override = DEVICE_MASK_HIP;
|
||||
}
|
||||
else if (override == "METAL") {
|
||||
BlenderSession::device_override = DEVICE_MASK_METAL;
|
||||
}
|
||||
else {
|
||||
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
|
||||
Py_RETURN_FALSE;
|
||||
@@ -1059,13 +1054,5 @@ void *CCL_python_module_init()
|
||||
Py_INCREF(Py_False);
|
||||
}
|
||||
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
PyModule_AddObject(mod, "with_debug", Py_True);
|
||||
Py_INCREF(Py_True);
|
||||
#else /* WITH_CYCLES_DEBUG */
|
||||
PyModule_AddObject(mod, "with_debug", Py_False);
|
||||
Py_INCREF(Py_False);
|
||||
#endif /* WITH_CYCLES_DEBUG */
|
||||
|
||||
return (void *)mod;
|
||||
}
|
||||
|
@@ -396,13 +396,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
||||
/* set the current view */
|
||||
b_engine.active_view_set(b_rview_name.c_str());
|
||||
|
||||
/* Force update in this case, since the camera transform on each frame changes
|
||||
* in different views. This could be optimized by somehow storing the animated
|
||||
* camera transforms separate from the fixed stereo transform. */
|
||||
if ((scene->need_motion() != Scene::MOTION_NONE) && view_index > 0) {
|
||||
sync->tag_update();
|
||||
}
|
||||
|
||||
/* update scene */
|
||||
BL::Object b_camera_override(b_engine.camera_override());
|
||||
sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
|
||||
@@ -636,7 +629,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||
}
|
||||
|
||||
/* Always use transparent background for baking. */
|
||||
/* Always use transpanent background for baking. */
|
||||
scene->background->set_transparent(true);
|
||||
|
||||
/* Load built-in images from Blender. */
|
||||
|
@@ -378,20 +378,11 @@ static ShaderNode *add_node(Scene *scene,
|
||||
}
|
||||
else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
|
||||
BL::ShaderNodeMapRange b_map_range_node(b_node);
|
||||
if (b_map_range_node.data_type() == BL::ShaderNodeMapRange::data_type_FLOAT_VECTOR) {
|
||||
VectorMapRangeNode *vector_map_range_node = graph->create_node<VectorMapRangeNode>();
|
||||
vector_map_range_node->set_use_clamp(b_map_range_node.clamp());
|
||||
vector_map_range_node->set_range_type(
|
||||
(NodeMapRangeType)b_map_range_node.interpolation_type());
|
||||
node = vector_map_range_node;
|
||||
}
|
||||
else {
|
||||
MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
|
||||
map_range_node->set_clamp(b_map_range_node.clamp());
|
||||
map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
|
||||
node = map_range_node;
|
||||
}
|
||||
}
|
||||
else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
|
||||
BL::ShaderNodeClamp b_clamp_node(b_node);
|
||||
ClampNode *clamp_node = graph->create_node<ClampNode>();
|
||||
@@ -771,12 +762,11 @@ static ShaderNode *add_node(Scene *scene,
|
||||
int scene_frame = b_scene.frame_current();
|
||||
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
||||
image->handle = scene->image_manager->add_image(
|
||||
new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
|
||||
image->image_params());
|
||||
new BlenderImageLoader(b_image, image_frame), image->image_params());
|
||||
}
|
||||
else {
|
||||
ustring filename = ustring(
|
||||
image_user_file_path(b_image_user, b_image, b_scene.frame_current()));
|
||||
image_user_file_path(b_image_user, b_image, b_scene.frame_current(), true));
|
||||
image->set_filename(filename);
|
||||
}
|
||||
}
|
||||
@@ -807,13 +797,12 @@ static ShaderNode *add_node(Scene *scene,
|
||||
if (is_builtin) {
|
||||
int scene_frame = b_scene.frame_current();
|
||||
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
||||
env->handle = scene->image_manager->add_image(
|
||||
new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
|
||||
env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
|
||||
env->image_params());
|
||||
}
|
||||
else {
|
||||
env->set_filename(
|
||||
ustring(image_user_file_path(b_image_user, b_image, b_scene.frame_current())));
|
||||
ustring(image_user_file_path(b_image_user, b_image, b_scene.frame_current(), false)));
|
||||
}
|
||||
}
|
||||
node = env;
|
||||
|
@@ -95,11 +95,6 @@ void BlenderSync::reset(BL::BlendData &b_data, BL::Scene &b_scene)
|
||||
this->b_scene = b_scene;
|
||||
}
|
||||
|
||||
void BlenderSync::tag_update()
|
||||
{
|
||||
has_updates_ = true;
|
||||
}
|
||||
|
||||
/* Sync */
|
||||
|
||||
void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
|
||||
|
@@ -66,8 +66,6 @@ class BlenderSync {
|
||||
|
||||
void reset(BL::BlendData &b_data, BL::Scene &b_scene);
|
||||
|
||||
void tag_update();
|
||||
|
||||
/* sync */
|
||||
void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
|
||||
void sync_data(BL::RenderSettings &b_render,
|
||||
@@ -169,16 +167,12 @@ class BlenderSync {
|
||||
Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
|
||||
bool object_has_particle_hair(BL::Object b_ob);
|
||||
|
||||
/* Point Cloud */
|
||||
void sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info);
|
||||
void sync_pointcloud_motion(PointCloud *pointcloud, BObjectInfo &b_ob_info, int motion_step = 0);
|
||||
|
||||
/* Camera */
|
||||
void sync_camera_motion(
|
||||
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
|
||||
|
||||
/* Geometry */
|
||||
Geometry *sync_geometry(BL::Depsgraph &b_depsgraph,
|
||||
Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
|
||||
BObjectInfo &b_ob_info,
|
||||
bool object_updated,
|
||||
bool use_particle_hair,
|
||||
@@ -273,6 +267,7 @@ class BlenderSync {
|
||||
|
||||
Progress &progress;
|
||||
|
||||
protected:
|
||||
/* Indicates that `sync_recalc()` detected changes in the scene.
|
||||
* If this flag is false then the data is considered to be up-to-date and will not be
|
||||
* synchronized at all. */
|
||||
|
@@ -33,7 +33,7 @@
|
||||
|
||||
extern "C" {
|
||||
void BKE_image_user_frame_calc(void *ima, void *iuser, int cfra);
|
||||
void BKE_image_user_file_path_ex(void *iuser, void *ima, char *path, bool resolve_udim);
|
||||
void BKE_image_user_file_path(void *iuser, void *ima, char *path);
|
||||
unsigned char *BKE_image_get_pixels_for_frame(void *image, int frame, int tile);
|
||||
float *BKE_image_get_float_pixels_for_frame(void *image, int frame, int tile);
|
||||
}
|
||||
@@ -290,14 +290,25 @@ static inline int render_resolution_y(BL::RenderSettings &b_render)
|
||||
return b_render.resolution_y() * b_render.resolution_percentage() / 100;
|
||||
}
|
||||
|
||||
static inline string image_user_file_path(BL::ImageUser &iuser, BL::Image &ima, int cfra)
|
||||
static inline string image_user_file_path(BL::ImageUser &iuser,
|
||||
BL::Image &ima,
|
||||
int cfra,
|
||||
bool load_tiled)
|
||||
{
|
||||
char filepath[1024];
|
||||
iuser.tile(0);
|
||||
BKE_image_user_frame_calc(ima.ptr.data, iuser.ptr.data, cfra);
|
||||
BKE_image_user_file_path_ex(iuser.ptr.data, ima.ptr.data, filepath, false);
|
||||
BKE_image_user_file_path(iuser.ptr.data, ima.ptr.data, filepath);
|
||||
|
||||
return string(filepath);
|
||||
string filepath_str = string(filepath);
|
||||
if (load_tiled && ima.source() == BL::Image::source_TILED) {
|
||||
string udim;
|
||||
if (!ima.tiles.empty()) {
|
||||
udim = to_string(ima.tiles[0].number());
|
||||
}
|
||||
string_replace(filepath_str, udim, "<UDIM>");
|
||||
}
|
||||
return filepath_str;
|
||||
}
|
||||
|
||||
static inline int image_user_frame_number(BL::ImageUser &iuser, BL::Image &ima, int cfra)
|
||||
|
@@ -33,17 +33,6 @@ set(SRC
|
||||
unaligned.cpp
|
||||
)
|
||||
|
||||
set(SRC_METAL
|
||||
metal.mm
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
list(APPEND SRC
|
||||
${SRC_METAL}
|
||||
)
|
||||
add_definitions(-DWITH_METAL)
|
||||
endif()
|
||||
|
||||
set(SRC_HEADERS
|
||||
bvh.h
|
||||
bvh2.h
|
||||
@@ -57,7 +46,6 @@ set(SRC_HEADERS
|
||||
sort.h
|
||||
split.h
|
||||
unaligned.h
|
||||
metal.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
@@ -26,7 +26,6 @@
|
||||
#include "scene/hair.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/scene.h"
|
||||
|
||||
#include "util/algorithm.h"
|
||||
@@ -114,9 +113,9 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
else {
|
||||
/* Motion triangles, trace optimized case: we split triangle
|
||||
* primitives into separate nodes for each of the time steps.
|
||||
* This way we minimize overlap of neighbor triangle primitives.
|
||||
* This way we minimize overlap of neighbor curve primitives.
|
||||
*/
|
||||
const int num_bvh_steps = params.num_motion_triangle_steps * 2 + 1;
|
||||
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
|
||||
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||
const size_t num_verts = mesh->verts.size();
|
||||
const size_t num_steps = mesh->motion_steps;
|
||||
@@ -270,101 +269,6 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_points(BoundBox &root,
|
||||
BoundBox ¢er,
|
||||
PointCloud *pointcloud,
|
||||
int i)
|
||||
{
|
||||
const Attribute *point_attr_mP = NULL;
|
||||
if (pointcloud->has_motion_blur()) {
|
||||
point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
|
||||
const float3 *points_data = &pointcloud->points[0];
|
||||
const float *radius_data = &pointcloud->radius[0];
|
||||
const size_t num_points = pointcloud->num_points();
|
||||
const float3 *motion_data = (point_attr_mP) ? point_attr_mP->data_float3() : NULL;
|
||||
const size_t num_steps = pointcloud->get_motion_steps();
|
||||
|
||||
if (point_attr_mP == NULL) {
|
||||
/* Really simple logic for static points. */
|
||||
for (uint j = 0; j < num_points; j++) {
|
||||
const PointCloud::Point point = pointcloud->get_point(j);
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
point.bounds_grow(points_data, radius_data, bounds);
|
||||
if (bounds.valid()) {
|
||||
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_POINT));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (params.num_motion_point_steps == 0 || params.use_spatial_split) {
|
||||
/* Simple case of motion points: single node for the whole
|
||||
* shutter time. Lowest memory usage but less optimal
|
||||
* rendering.
|
||||
*/
|
||||
/* TODO(sergey): Support motion steps for spatially split BVH. */
|
||||
for (uint j = 0; j < num_points; j++) {
|
||||
const PointCloud::Point point = pointcloud->get_point(j);
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
point.bounds_grow(points_data, radius_data, bounds);
|
||||
for (size_t step = 0; step < num_steps - 1; step++) {
|
||||
point.bounds_grow(motion_data + step * num_points, radius_data, bounds);
|
||||
}
|
||||
if (bounds.valid()) {
|
||||
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Motion points, trace optimized case: we split point
|
||||
* primitives into separate nodes for each of the time steps.
|
||||
* This way we minimize overlap of neighbor point primitives.
|
||||
*/
|
||||
const int num_bvh_steps = params.num_motion_point_steps * 2 + 1;
|
||||
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||
|
||||
for (uint j = 0; j < num_points; j++) {
|
||||
const PointCloud::Point point = pointcloud->get_point(j);
|
||||
const size_t num_steps = pointcloud->get_motion_steps();
|
||||
const float3 *point_steps = point_attr_mP->data_float3();
|
||||
|
||||
/* Calculate bounding box of the previous time step.
|
||||
* Will be reused later to avoid duplicated work on
|
||||
* calculating BVH time step boundbox.
|
||||
*/
|
||||
float4 prev_key = point.motion_key(
|
||||
points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
|
||||
BoundBox prev_bounds = BoundBox::empty;
|
||||
point.bounds_grow(prev_key, prev_bounds);
|
||||
/* Create all primitive time steps, */
|
||||
for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
|
||||
const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
|
||||
float4 curr_key = point.motion_key(
|
||||
points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
|
||||
BoundBox curr_bounds = BoundBox::empty;
|
||||
point.bounds_grow(curr_key, curr_bounds);
|
||||
BoundBox bounds = prev_bounds;
|
||||
bounds.grow(curr_bounds);
|
||||
if (bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
references.push_back(
|
||||
BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT, prev_time, curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
/* Current time boundbox becomes previous one for the
|
||||
* next time step.
|
||||
*/
|
||||
prev_bounds = curr_bounds;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_geometry(BoundBox &root,
|
||||
BoundBox ¢er,
|
||||
Geometry *geom,
|
||||
@@ -378,10 +282,6 @@ void BVHBuild::add_reference_geometry(BoundBox &root,
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
add_reference_curves(root, center, hair, object_index);
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
add_reference_points(root, center, pointcloud, object_index);
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i)
|
||||
@@ -411,10 +311,6 @@ static size_t count_primitives(Geometry *geom)
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
return count_curve_segments(hair);
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
return pointcloud->num_points();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -432,10 +328,9 @@ void BVHBuild::add_references(BVHRange &root)
|
||||
if (!ob->get_geometry()->is_instanced()) {
|
||||
num_alloc_references += count_primitives(ob->get_geometry());
|
||||
}
|
||||
else {
|
||||
else
|
||||
num_alloc_references++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
num_alloc_references += count_primitives(ob->get_geometry());
|
||||
}
|
||||
@@ -499,7 +394,7 @@ BVHNode *BVHBuild::run()
|
||||
spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
|
||||
spatial_free_index = 0;
|
||||
|
||||
need_prim_time = params.use_motion_steps();
|
||||
need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
|
||||
|
||||
/* init progress updates */
|
||||
double build_start_time;
|
||||
@@ -640,8 +535,7 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
|
||||
const vector<BVHReference> &references) const
|
||||
{
|
||||
size_t size = range.size();
|
||||
size_t max_leaf_size = max(max(params.max_triangle_leaf_size, params.max_curve_leaf_size),
|
||||
params.max_point_leaf_size);
|
||||
size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
|
||||
|
||||
if (size > max_leaf_size)
|
||||
return false;
|
||||
@@ -650,44 +544,32 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
|
||||
size_t num_motion_triangles = 0;
|
||||
size_t num_curves = 0;
|
||||
size_t num_motion_curves = 0;
|
||||
size_t num_points = 0;
|
||||
size_t num_motion_points = 0;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
const BVHReference &ref = references[range.start() + i];
|
||||
|
||||
if (ref.prim_type() & PRIMITIVE_CURVE) {
|
||||
if (ref.prim_type() & PRIMITIVE_MOTION) {
|
||||
if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
|
||||
if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
|
||||
num_motion_curves++;
|
||||
}
|
||||
else {
|
||||
num_curves++;
|
||||
}
|
||||
}
|
||||
else if (ref.prim_type() & PRIMITIVE_TRIANGLE) {
|
||||
if (ref.prim_type() & PRIMITIVE_MOTION) {
|
||||
else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
|
||||
if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
|
||||
num_motion_triangles++;
|
||||
}
|
||||
else {
|
||||
num_triangles++;
|
||||
}
|
||||
}
|
||||
else if (ref.prim_type() & PRIMITIVE_POINT) {
|
||||
if (ref.prim_type() & PRIMITIVE_MOTION) {
|
||||
num_motion_points++;
|
||||
}
|
||||
else {
|
||||
num_points++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (num_triangles <= params.max_triangle_leaf_size) &&
|
||||
(num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
|
||||
(num_curves <= params.max_curve_leaf_size) &&
|
||||
(num_motion_curves <= params.max_motion_curve_leaf_size) &&
|
||||
(num_points <= params.max_point_leaf_size) &&
|
||||
(num_motion_points <= params.max_motion_point_leaf_size);
|
||||
(num_motion_curves <= params.max_motion_curve_leaf_size);
|
||||
}
|
||||
|
||||
/* multithreaded binning builder */
|
||||
@@ -973,7 +855,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
|
||||
for (int i = 0; i < range.size(); i++) {
|
||||
const BVHReference &ref = references[range.start() + i];
|
||||
if (ref.prim_index() != -1) {
|
||||
uint32_t type_index = PRIMITIVE_INDEX(ref.prim_type() & PRIMITIVE_ALL);
|
||||
uint32_t type_index = bitscan((uint32_t)(ref.prim_type() & PRIMITIVE_ALL));
|
||||
p_ref[type_index].push_back(ref);
|
||||
p_type[type_index].push_back(ref.prim_type());
|
||||
p_index[type_index].push_back(ref.prim_index());
|
||||
|
@@ -39,7 +39,6 @@ class Geometry;
|
||||
class Hair;
|
||||
class Mesh;
|
||||
class Object;
|
||||
class PointCloud;
|
||||
class Progress;
|
||||
|
||||
/* BVH Builder */
|
||||
@@ -69,7 +68,6 @@ class BVHBuild {
|
||||
/* Adding references. */
|
||||
void add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i);
|
||||
void add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i);
|
||||
void add_reference_points(BoundBox &root, BoundBox ¢er, PointCloud *pointcloud, int i);
|
||||
void add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i);
|
||||
void add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i);
|
||||
void add_references(BVHRange &root);
|
||||
|
@@ -19,7 +19,6 @@
|
||||
|
||||
#include "bvh/bvh2.h"
|
||||
#include "bvh/embree.h"
|
||||
#include "bvh/metal.h"
|
||||
#include "bvh/multi.h"
|
||||
#include "bvh/optix.h"
|
||||
|
||||
@@ -41,12 +40,8 @@ const char *bvh_layout_name(BVHLayout layout)
|
||||
return "EMBREE";
|
||||
case BVH_LAYOUT_OPTIX:
|
||||
return "OPTIX";
|
||||
case BVH_LAYOUT_METAL:
|
||||
return "METAL";
|
||||
case BVH_LAYOUT_MULTI_OPTIX:
|
||||
case BVH_LAYOUT_MULTI_METAL:
|
||||
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
||||
case BVH_LAYOUT_MULTI_METAL_EMBREE:
|
||||
return "MULTI";
|
||||
case BVH_LAYOUT_ALL:
|
||||
return "ALL";
|
||||
@@ -107,18 +102,9 @@ BVH *BVH::create(const BVHParams ¶ms,
|
||||
#else
|
||||
(void)device;
|
||||
break;
|
||||
#endif
|
||||
case BVH_LAYOUT_METAL:
|
||||
#ifdef WITH_METAL
|
||||
return bvh_metal_create(params, geometry, objects, device);
|
||||
#else
|
||||
(void)device;
|
||||
break;
|
||||
#endif
|
||||
case BVH_LAYOUT_MULTI_OPTIX:
|
||||
case BVH_LAYOUT_MULTI_METAL:
|
||||
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
||||
case BVH_LAYOUT_MULTI_METAL_EMBREE:
|
||||
return new BVHMulti(params, geometry, objects);
|
||||
case BVH_LAYOUT_NONE:
|
||||
case BVH_LAYOUT_ALL:
|
||||
|
@@ -20,7 +20,6 @@
|
||||
#include "scene/hair.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/pointcloud.h"
|
||||
|
||||
#include "bvh/build.h"
|
||||
#include "bvh/node.h"
|
||||
@@ -387,7 +386,7 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
|
||||
}
|
||||
else {
|
||||
/* Primitives. */
|
||||
if (pack.prim_type[prim] & PRIMITIVE_CURVE) {
|
||||
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
|
||||
/* Curves. */
|
||||
const Hair *hair = static_cast<const Hair *>(ob->get_geometry());
|
||||
int prim_offset = (params.top_level) ? hair->prim_offset : 0;
|
||||
@@ -410,30 +409,6 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (pack.prim_type[prim] & PRIMITIVE_POINT) {
|
||||
/* Points. */
|
||||
const PointCloud *pointcloud = static_cast<const PointCloud *>(ob->get_geometry());
|
||||
int prim_offset = (params.top_level) ? pointcloud->prim_offset : 0;
|
||||
const float3 *points = &pointcloud->points[0];
|
||||
const float *radius = &pointcloud->radius[0];
|
||||
PointCloud::Point point = pointcloud->get_point(pidx - prim_offset);
|
||||
|
||||
point.bounds_grow(points, radius, bbox);
|
||||
|
||||
/* Motion points. */
|
||||
if (pointcloud->get_use_motion_blur()) {
|
||||
Attribute *attr = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
|
||||
if (attr) {
|
||||
size_t pointcloud_size = pointcloud->points.size();
|
||||
size_t steps = pointcloud->get_motion_steps() - 1;
|
||||
float3 *point_steps = attr->data_float3();
|
||||
|
||||
for (size_t i = 0; i < steps; i++)
|
||||
point.bounds_grow(point_steps + i * pointcloud_size, radius, bbox);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Triangles. */
|
||||
const Mesh *mesh = static_cast<const Mesh *>(ob->get_geometry());
|
||||
@@ -530,8 +505,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack.leaf_nodes.resize(leaf_nodes_size);
|
||||
pack.object_node.resize(objects.size());
|
||||
|
||||
if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0 ||
|
||||
params.num_motion_point_steps > 0) {
|
||||
if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
|
||||
pack.prim_time.resize(prim_index_size);
|
||||
}
|
||||
|
||||
@@ -590,7 +564,13 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
|
||||
|
||||
for (size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||
}
|
||||
else {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||
}
|
||||
|
||||
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
||||
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
|
||||
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
|
||||
|
@@ -45,7 +45,6 @@
|
||||
# include "scene/hair.h"
|
||||
# include "scene/mesh.h"
|
||||
# include "scene/object.h"
|
||||
# include "scene/pointcloud.h"
|
||||
|
||||
# include "util/foreach.h"
|
||||
# include "util/log.h"
|
||||
@@ -91,7 +90,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
++ctx->num_hits;
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (current_isect.type & PRIMITIVE_CURVE) {
|
||||
if (current_isect.type & PRIMITIVE_ALL_CURVE) {
|
||||
ctx->throughput *= intersection_curve_shadow_transparency(
|
||||
kg, current_isect.object, current_isect.prim, current_isect.u);
|
||||
|
||||
@@ -246,7 +245,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
}
|
||||
}
|
||||
|
||||
static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||
static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
@@ -259,7 +258,7 @@ static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *arg
|
||||
}
|
||||
}
|
||||
|
||||
static void rtc_filter_occluded_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||
static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
||||
{
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
@@ -411,12 +410,6 @@ void BVHEmbree::add_object(Object *ob, int i)
|
||||
add_curves(ob, hair, i);
|
||||
}
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
if (pointcloud->num_points() > 0) {
|
||||
add_points(ob, pointcloud, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BVHEmbree::add_instance(Object *ob, int i)
|
||||
@@ -631,89 +624,6 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
||||
}
|
||||
}
|
||||
|
||||
void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
|
||||
const PointCloud *pointcloud,
|
||||
const bool update)
|
||||
{
|
||||
const Attribute *attr_mP = NULL;
|
||||
size_t num_motion_steps = 1;
|
||||
if (pointcloud->has_motion_blur()) {
|
||||
attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (attr_mP) {
|
||||
num_motion_steps = pointcloud->get_motion_steps();
|
||||
}
|
||||
}
|
||||
|
||||
const size_t num_points = pointcloud->num_points();
|
||||
|
||||
/* Copy the point data to Embree */
|
||||
const int t_mid = (num_motion_steps - 1) / 2;
|
||||
const float *radius = pointcloud->get_radius().data();
|
||||
for (int t = 0; t < num_motion_steps; ++t) {
|
||||
const float3 *verts;
|
||||
if (t == t_mid || attr_mP == NULL) {
|
||||
verts = pointcloud->get_points().data();
|
||||
}
|
||||
else {
|
||||
int t_ = (t > t_mid) ? (t - 1) : t;
|
||||
verts = &attr_mP->data_float3()[t_ * num_points];
|
||||
}
|
||||
|
||||
float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
|
||||
geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
|
||||
(float4 *)rtcSetNewGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_VERTEX,
|
||||
t,
|
||||
RTC_FORMAT_FLOAT4,
|
||||
sizeof(float) * 4,
|
||||
num_points);
|
||||
|
||||
assert(rtc_verts);
|
||||
if (rtc_verts) {
|
||||
for (size_t j = 0; j < num_points; ++j) {
|
||||
rtc_verts[j] = float3_to_float4(verts[j]);
|
||||
rtc_verts[j].w = radius[j];
|
||||
}
|
||||
}
|
||||
|
||||
if (update) {
|
||||
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BVHEmbree::add_points(const Object *ob, const PointCloud *pointcloud, int i)
|
||||
{
|
||||
size_t prim_offset = pointcloud->prim_offset;
|
||||
|
||||
const Attribute *attr_mP = NULL;
|
||||
size_t num_motion_steps = 1;
|
||||
if (pointcloud->has_motion_blur()) {
|
||||
attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (attr_mP) {
|
||||
num_motion_steps = pointcloud->get_motion_steps();
|
||||
}
|
||||
}
|
||||
|
||||
enum RTCGeometryType type = RTC_GEOMETRY_TYPE_SPHERE_POINT;
|
||||
|
||||
RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
|
||||
|
||||
rtcSetGeometryBuildQuality(geom_id, build_quality);
|
||||
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
|
||||
|
||||
set_point_vertex_buffer(geom_id, pointcloud, false);
|
||||
|
||||
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
|
||||
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||
|
||||
rtcCommitGeometry(geom_id);
|
||||
rtcAttachGeometryByID(scene, geom_id, i * 2);
|
||||
rtcReleaseGeometry(geom_id);
|
||||
}
|
||||
|
||||
void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
||||
{
|
||||
size_t prim_offset = hair->curve_segment_offset;
|
||||
@@ -768,8 +678,8 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
|
||||
}
|
||||
else {
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
|
||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
|
||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
|
||||
}
|
||||
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||
|
||||
@@ -806,14 +716,6 @@ void BVHEmbree::refit(Progress &progress)
|
||||
rtcCommitGeometry(geom);
|
||||
}
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
if (pointcloud->num_points() > 0) {
|
||||
RTCGeometry geom = rtcGetGeometry(scene, geom_id);
|
||||
set_point_vertex_buffer(geom, pointcloud, true);
|
||||
rtcCommitGeometry(geom);
|
||||
}
|
||||
}
|
||||
}
|
||||
geom_id += 2;
|
||||
}
|
||||
|
@@ -33,7 +33,6 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Hair;
|
||||
class Mesh;
|
||||
class PointCloud;
|
||||
|
||||
class BVHEmbree : public BVH {
|
||||
public:
|
||||
@@ -52,15 +51,11 @@ class BVHEmbree : public BVH {
|
||||
void add_object(Object *ob, int i);
|
||||
void add_instance(Object *ob, int i);
|
||||
void add_curves(const Object *ob, const Hair *hair, int i);
|
||||
void add_points(const Object *ob, const PointCloud *pointcloud, int i);
|
||||
void add_triangles(const Object *ob, const Mesh *mesh, int i);
|
||||
|
||||
private:
|
||||
void set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update);
|
||||
void set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update);
|
||||
void set_point_vertex_buffer(RTCGeometry geom_id,
|
||||
const PointCloud *pointcloud,
|
||||
const bool update);
|
||||
|
||||
RTCDevice rtc_device;
|
||||
enum RTCBuildQuality build_quality;
|
||||
|
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __BVH_METAL_H__
|
||||
#define __BVH_METAL_H__
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BVH *bvh_metal_create(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device);
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
|
||||
#endif /* __BVH_METAL_H__ */
|
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/bvh.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BVH *bvh_metal_create(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device)
|
||||
{
|
||||
return new BVHMetal(params, geometry, objects, device);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -83,8 +83,6 @@ class BVHParams {
|
||||
int max_motion_triangle_leaf_size;
|
||||
int max_curve_leaf_size;
|
||||
int max_motion_curve_leaf_size;
|
||||
int max_point_leaf_size;
|
||||
int max_motion_point_leaf_size;
|
||||
|
||||
/* object or mesh level bvh */
|
||||
bool top_level;
|
||||
@@ -100,13 +98,13 @@ class BVHParams {
|
||||
/* Split time range to this number of steps and create leaf node for each
|
||||
* of this time steps.
|
||||
*
|
||||
* Speeds up rendering of motion primitives in the cost of higher memory usage.
|
||||
* Speeds up rendering of motion curve primitives in the cost of higher
|
||||
* memory usage.
|
||||
*/
|
||||
int num_motion_curve_steps;
|
||||
|
||||
/* Same as above, but for triangle primitives. */
|
||||
int num_motion_triangle_steps;
|
||||
int num_motion_curve_steps;
|
||||
int num_motion_point_steps;
|
||||
|
||||
/* Same as in SceneParams. */
|
||||
int bvh_type;
|
||||
@@ -134,8 +132,6 @@ class BVHParams {
|
||||
max_motion_triangle_leaf_size = 8;
|
||||
max_curve_leaf_size = 1;
|
||||
max_motion_curve_leaf_size = 4;
|
||||
max_point_leaf_size = 8;
|
||||
max_motion_point_leaf_size = 8;
|
||||
|
||||
top_level = false;
|
||||
bvh_layout = BVH_LAYOUT_BVH2;
|
||||
@@ -143,7 +139,6 @@ class BVHParams {
|
||||
|
||||
num_motion_curve_steps = 0;
|
||||
num_motion_triangle_steps = 0;
|
||||
num_motion_point_steps = 0;
|
||||
|
||||
bvh_type = 0;
|
||||
|
||||
@@ -171,12 +166,6 @@ class BVHParams {
|
||||
return (size <= min_leaf_size || level >= MAX_DEPTH);
|
||||
}
|
||||
|
||||
bool use_motion_steps()
|
||||
{
|
||||
return num_motion_curve_steps > 0 || num_motion_triangle_steps > 0 ||
|
||||
num_motion_point_steps > 0;
|
||||
}
|
||||
|
||||
/* Gets best matching BVH.
|
||||
*
|
||||
* If the requested layout is supported by the device, it will be used.
|
||||
|
@@ -23,7 +23,6 @@
|
||||
#include "scene/hair.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/pointcloud.h"
|
||||
|
||||
#include "util/algorithm.h"
|
||||
|
||||
@@ -427,32 +426,6 @@ void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
|
||||
}
|
||||
}
|
||||
|
||||
void BVHSpatialSplit::split_point_primitive(const PointCloud *pointcloud,
|
||||
const Transform *tfm,
|
||||
int prim_index,
|
||||
int dim,
|
||||
float pos,
|
||||
BoundBox &left_bounds,
|
||||
BoundBox &right_bounds)
|
||||
{
|
||||
/* No real splitting support for points, assume they are small enough for it
|
||||
* not to matter. */
|
||||
float3 point = pointcloud->get_points()[prim_index];
|
||||
|
||||
if (tfm != NULL) {
|
||||
point = transform_point(tfm, point);
|
||||
}
|
||||
point = get_unaligned_point(point);
|
||||
|
||||
if (point[dim] <= pos) {
|
||||
left_bounds.grow(point);
|
||||
}
|
||||
|
||||
if (point[dim] >= pos) {
|
||||
right_bounds.grow(point);
|
||||
}
|
||||
}
|
||||
|
||||
void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
|
||||
const Mesh *mesh,
|
||||
int dim,
|
||||
@@ -480,16 +453,6 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
|
||||
right_bounds);
|
||||
}
|
||||
|
||||
void BVHSpatialSplit::split_point_reference(const BVHReference &ref,
|
||||
const PointCloud *pointcloud,
|
||||
int dim,
|
||||
float pos,
|
||||
BoundBox &left_bounds,
|
||||
BoundBox &right_bounds)
|
||||
{
|
||||
split_point_primitive(pointcloud, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
|
||||
}
|
||||
|
||||
void BVHSpatialSplit::split_object_reference(
|
||||
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
|
||||
{
|
||||
@@ -512,13 +475,6 @@ void BVHSpatialSplit::split_object_reference(
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
for (int point_idx = 0; point_idx < pointcloud->num_points(); ++point_idx) {
|
||||
split_point_primitive(
|
||||
pointcloud, &object->get_tfm(), point_idx, dim, pos, left_bounds, right_bounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BVHSpatialSplit::split_reference(const BVHBuild &builder,
|
||||
@@ -535,18 +491,14 @@ void BVHSpatialSplit::split_reference(const BVHBuild &builder,
|
||||
/* loop over vertices/edges. */
|
||||
const Object *ob = builder.objects[ref.prim_object()];
|
||||
|
||||
if (ref.prim_type() & PRIMITIVE_TRIANGLE) {
|
||||
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
|
||||
Mesh *mesh = static_cast<Mesh *>(ob->get_geometry());
|
||||
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
|
||||
}
|
||||
else if (ref.prim_type() & PRIMITIVE_CURVE) {
|
||||
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
|
||||
Hair *hair = static_cast<Hair *>(ob->get_geometry());
|
||||
split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
|
||||
}
|
||||
else if (ref.prim_type() & PRIMITIVE_POINT) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(ob->get_geometry());
|
||||
split_point_reference(ref, pointcloud, dim, pos, left_bounds, right_bounds);
|
||||
}
|
||||
else {
|
||||
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
|
||||
}
|
||||
|
@@ -26,7 +26,6 @@ CCL_NAMESPACE_BEGIN
|
||||
class BVHBuild;
|
||||
class Hair;
|
||||
class Mesh;
|
||||
class PointCloud;
|
||||
struct Transform;
|
||||
|
||||
/* Object Split */
|
||||
@@ -124,13 +123,6 @@ class BVHSpatialSplit {
|
||||
float pos,
|
||||
BoundBox &left_bounds,
|
||||
BoundBox &right_bounds);
|
||||
void split_point_primitive(const PointCloud *pointcloud,
|
||||
const Transform *tfm,
|
||||
int prim_index,
|
||||
int dim,
|
||||
float pos,
|
||||
BoundBox &left_bounds,
|
||||
BoundBox &right_bounds);
|
||||
|
||||
/* Lower-level functions which calculates boundaries of left and right nodes
|
||||
* needed for spatial split.
|
||||
@@ -149,12 +141,6 @@ class BVHSpatialSplit {
|
||||
float pos,
|
||||
BoundBox &left_bounds,
|
||||
BoundBox &right_bounds);
|
||||
void split_point_reference(const BVHReference &ref,
|
||||
const PointCloud *pointcloud,
|
||||
int dim,
|
||||
float pos,
|
||||
BoundBox &left_bounds,
|
||||
BoundBox &right_bounds);
|
||||
void split_object_reference(
|
||||
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
|
||||
|
||||
|
@@ -69,7 +69,7 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali
|
||||
const int packed_type = ref.prim_type();
|
||||
const int type = (packed_type & PRIMITIVE_ALL);
|
||||
/* No motion blur curves here, we can't fit them to aligned boxes well. */
|
||||
if ((type & PRIMITIVE_CURVE) && !(type & PRIMITIVE_MOTION)) {
|
||||
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
|
||||
const int curve_index = ref.prim_index();
|
||||
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
|
||||
const Hair *hair = static_cast<const Hair *>(object->get_geometry());
|
||||
@@ -95,7 +95,7 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
|
||||
const int packed_type = prim.prim_type();
|
||||
const int type = (packed_type & PRIMITIVE_ALL);
|
||||
/* No motion blur curves here, we can't fit them to aligned boxes well. */
|
||||
if ((type & PRIMITIVE_CURVE) && !(type & PRIMITIVE_MOTION)) {
|
||||
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
|
||||
const int curve_index = prim.prim_index();
|
||||
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
|
||||
const Hair *hair = static_cast<const Hair *>(object->get_geometry());
|
||||
|
@@ -551,23 +551,4 @@ if(NOT WITH_HIP_DYNLOAD)
|
||||
set(WITH_HIP_DYNLOAD ON)
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Metal
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
find_library(METAL_LIBRARY Metal)
|
||||
|
||||
# This file was added in the 12.0 SDK, use it as a way to detect the version.
|
||||
if (METAL_LIBRARY AND NOT EXISTS "${METAL_LIBRARY}/Headers/MTLFunctionStitching.h")
|
||||
message(STATUS "Metal version too old, must be SDK 12.0 or newer, disabling WITH_CYCLES_DEVICE_METAL")
|
||||
set(WITH_CYCLES_DEVICE_METAL OFF)
|
||||
elseif (NOT METAL_LIBRARY)
|
||||
message(STATUS "Metal not found, disabling WITH_CYCLES_DEVICE_METAL")
|
||||
set(WITH_CYCLES_DEVICE_METAL OFF)
|
||||
else()
|
||||
message(STATUS "Found Metal: ${METAL_LIBRARY}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
unset(_cycles_lib_dir)
|
||||
|
@@ -43,7 +43,7 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
endif()
|
||||
|
||||
set(SRC_BASE
|
||||
set(SRC
|
||||
device.cpp
|
||||
denoise.cpp
|
||||
graphics_interop.cpp
|
||||
@@ -104,21 +104,6 @@ set(SRC_MULTI
|
||||
multi/device.h
|
||||
)
|
||||
|
||||
set(SRC_METAL
|
||||
metal/bvh.mm
|
||||
metal/bvh.h
|
||||
metal/device.mm
|
||||
metal/device.h
|
||||
metal/device_impl.mm
|
||||
metal/device_impl.h
|
||||
metal/kernel.mm
|
||||
metal/kernel.h
|
||||
metal/queue.mm
|
||||
metal/queue.h
|
||||
metal/util.mm
|
||||
metal/util.h
|
||||
)
|
||||
|
||||
set(SRC_OPTIX
|
||||
optix/device.cpp
|
||||
optix/device.h
|
||||
@@ -138,17 +123,6 @@ set(SRC_HEADERS
|
||||
queue.h
|
||||
)
|
||||
|
||||
set(SRC
|
||||
${SRC_BASE}
|
||||
${SRC_CPU}
|
||||
${SRC_CUDA}
|
||||
${SRC_HIP}
|
||||
${SRC_DUMMY}
|
||||
${SRC_MULTI}
|
||||
${SRC_OPTIX}
|
||||
${SRC_HEADERS}
|
||||
)
|
||||
|
||||
set(LIB
|
||||
cycles_kernel
|
||||
cycles_util
|
||||
@@ -184,15 +158,6 @@ endif()
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
list(APPEND LIB
|
||||
${METAL_LIBRARY}
|
||||
)
|
||||
add_definitions(-DWITH_METAL)
|
||||
list(APPEND SRC
|
||||
${SRC_METAL}
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_OPENIMAGEDENOISE)
|
||||
list(APPEND LIB
|
||||
@@ -203,12 +168,20 @@ endif()
|
||||
include_directories(${INC})
|
||||
include_directories(SYSTEM ${INC_SYS})
|
||||
|
||||
cycles_add_library(cycles_device "${LIB}" ${SRC})
|
||||
cycles_add_library(cycles_device "${LIB}"
|
||||
${SRC}
|
||||
${SRC_CPU}
|
||||
${SRC_CUDA}
|
||||
${SRC_HIP}
|
||||
${SRC_DUMMY}
|
||||
${SRC_MULTI}
|
||||
${SRC_OPTIX}
|
||||
${SRC_HEADERS}
|
||||
)
|
||||
|
||||
source_group("cpu" FILES ${SRC_CPU})
|
||||
source_group("cuda" FILES ${SRC_CUDA})
|
||||
source_group("dummy" FILES ${SRC_DUMMY})
|
||||
source_group("multi" FILES ${SRC_MULTI})
|
||||
source_group("metal" FILES ${SRC_METAL})
|
||||
source_group("optix" FILES ${SRC_OPTIX})
|
||||
source_group("common" FILES ${SRC} ${SRC_HEADERS})
|
||||
|
@@ -129,7 +129,8 @@ void CPUDevice::mem_alloc(device_memory &mem)
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
assert(!mem.host_pointer);
|
||||
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
|
||||
void *data = util_aligned_malloc(mem.memory_size(), alignment);
|
||||
mem.device_pointer = (device_ptr)data;
|
||||
@@ -188,7 +189,7 @@ void CPUDevice::mem_free(device_memory &mem)
|
||||
tex_free((device_texture &)mem);
|
||||
}
|
||||
else if (mem.device_pointer) {
|
||||
if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
util_aligned_free((void *)mem.device_pointer);
|
||||
}
|
||||
mem.device_pointer = 0;
|
||||
@@ -273,8 +274,7 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
{
|
||||
#ifdef WITH_EMBREE
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE) {
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
|
||||
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
||||
if (refit) {
|
||||
bvh_embree->refit(progress);
|
||||
|
@@ -477,10 +477,10 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
|
||||
* still to make it faster. */
|
||||
CUDADeviceQueue queue(this);
|
||||
|
||||
device_ptr d_path_index = 0;
|
||||
device_ptr d_render_buffer = 0;
|
||||
void *d_path_index = nullptr;
|
||||
void *d_render_buffer = nullptr;
|
||||
int d_work_size = 0;
|
||||
DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
|
||||
void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};
|
||||
|
||||
queue.init_execution();
|
||||
queue.enqueue(test_kernel, 1, args);
|
||||
@@ -678,7 +678,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = CUDA_SUCCESS;
|
||||
@@ -701,15 +701,9 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
|
||||
}
|
||||
|
||||
if (mem_alloc_result != CUDA_SUCCESS) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
status = " failed, out of device memory";
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
else {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||
|
@@ -89,9 +89,7 @@ bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
|
||||
return cuda_device_->kernels.available(kernel);
|
||||
}
|
||||
|
||||
bool CUDADeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args)
|
||||
bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
||||
{
|
||||
if (cuda_device_->have_error()) {
|
||||
return false;
|
||||
@@ -135,7 +133,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel,
|
||||
1,
|
||||
shared_mem_bytes,
|
||||
cuda_stream_,
|
||||
const_cast<void **>(args.values),
|
||||
args,
|
||||
0),
|
||||
"enqueue");
|
||||
|
||||
|
@@ -42,9 +42,7 @@ class CUDADeviceQueue : public DeviceQueue {
|
||||
|
||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args) override;
|
||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
||||
|
||||
virtual bool synchronize() override;
|
||||
|
||||
|
@@ -27,7 +27,6 @@
|
||||
#include "device/cuda/device.h"
|
||||
#include "device/dummy/device.h"
|
||||
#include "device/hip/device.h"
|
||||
#include "device/metal/device.h"
|
||||
#include "device/multi/device.h"
|
||||
#include "device/optix/device.h"
|
||||
|
||||
@@ -50,7 +49,6 @@ vector<DeviceInfo> Device::cuda_devices;
|
||||
vector<DeviceInfo> Device::optix_devices;
|
||||
vector<DeviceInfo> Device::cpu_devices;
|
||||
vector<DeviceInfo> Device::hip_devices;
|
||||
vector<DeviceInfo> Device::metal_devices;
|
||||
uint Device::devices_initialized_mask = 0;
|
||||
|
||||
/* Device */
|
||||
@@ -107,12 +105,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_METAL
|
||||
case DEVICE_METAL:
|
||||
if (device_metal_init())
|
||||
device = device_metal_create(info, stats, profiler);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -136,8 +128,6 @@ DeviceType Device::type_from_string(const char *name)
|
||||
return DEVICE_MULTI;
|
||||
else if (strcmp(name, "HIP") == 0)
|
||||
return DEVICE_HIP;
|
||||
else if (strcmp(name, "METAL") == 0)
|
||||
return DEVICE_METAL;
|
||||
|
||||
return DEVICE_NONE;
|
||||
}
|
||||
@@ -154,8 +144,6 @@ string Device::string_from_type(DeviceType type)
|
||||
return "MULTI";
|
||||
else if (type == DEVICE_HIP)
|
||||
return "HIP";
|
||||
else if (type == DEVICE_METAL)
|
||||
return "METAL";
|
||||
|
||||
return "";
|
||||
}
|
||||
@@ -173,9 +161,7 @@ vector<DeviceType> Device::available_types()
|
||||
#ifdef WITH_HIP
|
||||
types.push_back(DEVICE_HIP);
|
||||
#endif
|
||||
#ifdef WITH_METAL
|
||||
types.push_back(DEVICE_METAL);
|
||||
#endif
|
||||
|
||||
return types;
|
||||
}
|
||||
|
||||
@@ -241,20 +227,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_METAL
|
||||
if (mask & DEVICE_MASK_METAL) {
|
||||
if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
|
||||
if (device_metal_init()) {
|
||||
device_metal_info(metal_devices);
|
||||
}
|
||||
devices_initialized_mask |= DEVICE_MASK_METAL;
|
||||
}
|
||||
foreach (DeviceInfo &info, metal_devices) {
|
||||
devices.push_back(info);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return devices;
|
||||
}
|
||||
|
||||
@@ -294,15 +266,6 @@ string Device::device_capabilities(uint mask)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_METAL
|
||||
if (mask & DEVICE_MASK_METAL) {
|
||||
if (device_metal_init()) {
|
||||
capabilities += "\nMetal device capabilities:\n";
|
||||
capabilities += device_metal_capabilities();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
@@ -391,7 +354,6 @@ void Device::free_memory()
|
||||
optix_devices.free_memory();
|
||||
hip_devices.free_memory();
|
||||
cpu_devices.free_memory();
|
||||
metal_devices.free_memory();
|
||||
}
|
||||
|
||||
unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
||||
|
@@ -52,7 +52,6 @@ enum DeviceType {
|
||||
DEVICE_MULTI,
|
||||
DEVICE_OPTIX,
|
||||
DEVICE_HIP,
|
||||
DEVICE_METAL,
|
||||
DEVICE_DUMMY,
|
||||
};
|
||||
|
||||
@@ -61,7 +60,6 @@ enum DeviceTypeMask {
|
||||
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
|
||||
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
|
||||
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
|
||||
DEVICE_MASK_METAL = (1 << DEVICE_METAL),
|
||||
DEVICE_MASK_ALL = ~0
|
||||
};
|
||||
|
||||
@@ -283,7 +281,6 @@ class Device {
|
||||
static vector<DeviceInfo> optix_devices;
|
||||
static vector<DeviceInfo> cpu_devices;
|
||||
static vector<DeviceInfo> hip_devices;
|
||||
static vector<DeviceInfo> metal_devices;
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
|
@@ -57,16 +57,9 @@ bool device_hip_init()
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (hipew_result == HIPEW_ERROR_ATEXIT_FAILED) {
|
||||
VLOG(1) << "HIPEW initialization failed: Error setting up atexit() handler";
|
||||
}
|
||||
else if (hipew_result == HIPEW_ERROR_OLD_DRIVER) {
|
||||
VLOG(1) << "HIPEW initialization failed: Driver version too old, requires AMD Radeon Pro "
|
||||
"21.Q4 driver or newer";
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "HIPEW initialization failed: Error opening HIP dynamic library";
|
||||
}
|
||||
VLOG(1) << "HIPEW initialization failed: "
|
||||
<< ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
|
||||
"Error opening the library");
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@@ -440,10 +440,10 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
|
||||
* still to make it faster. */
|
||||
HIPDeviceQueue queue(this);
|
||||
|
||||
device_ptr d_path_index = 0;
|
||||
device_ptr d_render_buffer = 0;
|
||||
void *d_path_index = nullptr;
|
||||
void *d_render_buffer = nullptr;
|
||||
int d_work_size = 0;
|
||||
DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
|
||||
void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};
|
||||
|
||||
queue.init_execution();
|
||||
queue.enqueue(test_kernel, 1, args);
|
||||
|
@@ -89,9 +89,7 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
|
||||
return hip_device_->kernels.available(kernel);
|
||||
}
|
||||
|
||||
bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args)
|
||||
bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
||||
{
|
||||
if (hip_device_->have_error()) {
|
||||
return false;
|
||||
@@ -134,7 +132,7 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
1,
|
||||
shared_mem_bytes,
|
||||
hip_stream_,
|
||||
const_cast<void **>(args.values),
|
||||
args,
|
||||
0),
|
||||
"enqueue");
|
||||
|
||||
|
@@ -42,9 +42,7 @@ class HIPDeviceQueue : public DeviceQueue {
|
||||
|
||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args) override;
|
||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
||||
|
||||
virtual bool synchronize() override;
|
||||
|
||||
|
@@ -263,7 +263,6 @@ class device_memory {
|
||||
friend class CUDADevice;
|
||||
friend class OptiXDevice;
|
||||
friend class HIPDevice;
|
||||
friend class MetalDevice;
|
||||
|
||||
/* Only create through subclasses. */
|
||||
device_memory(Device *device, const char *name, MemoryType type);
|
||||
@@ -582,7 +581,7 @@ template<typename T> class device_vector : public device_memory {
|
||||
* from an already allocated base memory. It is freed automatically when it
|
||||
* goes out of scope, which should happen before base memory is freed.
|
||||
*
|
||||
* NOTE: some devices require offset and size of the sub_ptr to be properly
|
||||
* Note: some devices require offset and size of the sub_ptr to be properly
|
||||
* aligned to device->mem_address_alingment(). */
|
||||
|
||||
class device_sub_ptr {
|
||||
|
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
# include "bvh/params.h"
|
||||
# include "device/memory.h"
|
||||
|
||||
# include <Metal/Metal.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BVHMetal : public BVH {
|
||||
public:
|
||||
API_AVAILABLE(macos(11.0))
|
||||
id<MTLAccelerationStructure> accel_struct = nil;
|
||||
bool accel_struct_building = false;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
vector<id<MTLAccelerationStructure>> blas_array;
|
||||
|
||||
bool motion_blur = false;
|
||||
|
||||
Stats &stats;
|
||||
|
||||
bool build(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||
|
||||
BVHMetal(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device);
|
||||
virtual ~BVHMetal();
|
||||
|
||||
bool build_BLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||
bool build_BLAS_mesh(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit);
|
||||
bool build_BLAS_hair(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit);
|
||||
bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -1,813 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "scene/hair.h"
|
||||
# include "scene/mesh.h"
|
||||
# include "scene/object.h"
|
||||
|
||||
# include "util/progress.h"
|
||||
|
||||
# include "device/metal/bvh.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
# define BVH_status(...) \
|
||||
{ \
|
||||
string str = string_printf(__VA_ARGS__); \
|
||||
progress.set_substatus(str); \
|
||||
}
|
||||
|
||||
BVHMetal::BVHMetal(const BVHParams ¶ms_,
|
||||
const vector<Geometry *> &geometry_,
|
||||
const vector<Object *> &objects_,
|
||||
Device *device)
|
||||
: BVH(params_, geometry_, objects_), stats(device->stats)
|
||||
{
|
||||
}
|
||||
|
||||
BVHMetal::~BVHMetal()
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (accel_struct) {
|
||||
stats.mem_free(accel_struct.allocatedSize);
|
||||
[accel_struct release];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool BVHMetal::build_BLAS_mesh(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
/* Build BLAS for triangle primitives */
|
||||
Mesh *const mesh = static_cast<Mesh *const>(geom);
|
||||
if (mesh->num_triangles() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status(
|
||||
"Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
|
||||
/*------------------------------------------------*/
|
||||
|
||||
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||
|
||||
const array<float3> &verts = mesh->get_verts();
|
||||
const array<int> &tris = mesh->get_triangles();
|
||||
const size_t num_verts = verts.size();
|
||||
const size_t num_indices = tris.size();
|
||||
|
||||
size_t num_motion_steps = 1;
|
||||
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
|
||||
num_motion_steps = mesh->get_motion_steps();
|
||||
}
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
else {
|
||||
storage_mode = MTLResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
/* Upload the mesh data to the GPU */
|
||||
id<MTLBuffer> posBuf = nil;
|
||||
id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data()
|
||||
length:num_indices * sizeof(tris.data()[0])
|
||||
options:storage_mode];
|
||||
|
||||
if (num_motion_steps == 1) {
|
||||
posBuf = [device newBufferWithBytes:verts.data()
|
||||
length:num_verts * sizeof(verts.data()[0])
|
||||
options:storage_mode];
|
||||
}
|
||||
else {
|
||||
posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
|
||||
options:storage_mode];
|
||||
float3 *dest_data = (float3 *)[posBuf contents];
|
||||
size_t center_step = (num_motion_steps - 1) / 2;
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
const float3 *verts = mesh->get_verts().data();
|
||||
|
||||
/* The center step for motion vertices is not stored in the attribute. */
|
||||
if (step != center_step) {
|
||||
verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
|
||||
}
|
||||
memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
|
||||
}
|
||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||
[posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
|
||||
}
|
||||
}
|
||||
|
||||
/* Create an acceleration structure. */
|
||||
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||
if (num_motion_steps > 1) {
|
||||
std::vector<MTLMotionKeyframeData *> vertex_ptrs;
|
||||
vertex_ptrs.reserve(num_motion_steps);
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
||||
k.buffer = posBuf;
|
||||
k.offset = num_verts * step * sizeof(float3);
|
||||
vertex_ptrs.push_back(k);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
|
||||
[MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
|
||||
geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
|
||||
count:vertex_ptrs.size()];
|
||||
geomDescMotion.vertexStride = sizeof(verts.data()[0]);
|
||||
geomDescMotion.indexBuffer = indexBuf;
|
||||
geomDescMotion.indexBufferOffset = 0;
|
||||
geomDescMotion.indexType = MTLIndexTypeUInt32;
|
||||
geomDescMotion.triangleCount = num_indices / 3;
|
||||
geomDescMotion.intersectionFunctionTableOffset = 0;
|
||||
|
||||
geomDesc = geomDescMotion;
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
|
||||
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
|
||||
geomDescNoMotion.vertexBuffer = posBuf;
|
||||
geomDescNoMotion.vertexBufferOffset = 0;
|
||||
geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
|
||||
geomDescNoMotion.indexBuffer = indexBuf;
|
||||
geomDescNoMotion.indexBufferOffset = 0;
|
||||
geomDescNoMotion.indexType = MTLIndexTypeUInt32;
|
||||
geomDescNoMotion.triangleCount = num_indices / 3;
|
||||
geomDescNoMotion.intersectionFunctionTableOffset = 0;
|
||||
|
||||
geomDesc = geomDescNoMotion;
|
||||
}
|
||||
|
||||
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||
/* (Match optix behavior: unsigned int build_flags =
|
||||
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
|
||||
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||
if (num_motion_steps > 1) {
|
||||
accelDesc.motionStartTime = 0.0f;
|
||||
accelDesc.motionEndTime = 1.0f;
|
||||
accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
|
||||
accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
|
||||
accelDesc.motionKeyframeCount = num_motion_steps;
|
||||
}
|
||||
|
||||
if (!use_fast_trace_bvh) {
|
||||
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||
MTLAccelerationStructureUsagePreferFastBuild);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel_uncompressed = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
if (refit) {
|
||||
[accelEnc refitAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
destination:accel_uncompressed
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
else {
|
||||
[accelEnc buildAccelerationStructure:accel_uncompressed
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
if (use_fast_trace_bvh) {
|
||||
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
||||
toBuffer:sizeBuf
|
||||
offset:0
|
||||
sizeDataType:MTLDataTypeULong];
|
||||
}
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
/* free temp resources */
|
||||
[scratchBuf release];
|
||||
[indexBuf release];
|
||||
[posBuf release];
|
||||
|
||||
if (use_fast_trace_bvh) {
|
||||
/* Compact the accel structure */
|
||||
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
id<MTLAccelerationStructure> accel = [device
|
||||
newAccelerationStructureWithSize:compressed_size];
|
||||
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
||||
toAccelerationStructure:accel];
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
uint64_t allocated_size = [accel allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct = accel;
|
||||
[accel_uncompressed release];
|
||||
accel_struct_building = false;
|
||||
}];
|
||||
[accelCommands commit];
|
||||
});
|
||||
}
|
||||
else {
|
||||
/* set our acceleration structure to the uncompressed structure */
|
||||
accel_struct = accel_uncompressed;
|
||||
|
||||
uint64_t allocated_size = [accel_struct allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct_building = false;
|
||||
}
|
||||
[sizeBuf release];
|
||||
}];
|
||||
|
||||
accel_struct_building = true;
|
||||
[accelCommands commit];
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build_BLAS_hair(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
/* Build BLAS for hair curves */
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
if (hair->num_curves() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status(
|
||||
"Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
|
||||
/*------------------------------------------------*/
|
||||
|
||||
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||
const size_t num_segments = hair->num_segments();
|
||||
|
||||
size_t num_motion_steps = 1;
|
||||
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
|
||||
num_motion_steps = hair->get_motion_steps();
|
||||
}
|
||||
|
||||
const size_t num_aabbs = num_segments * num_motion_steps;
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
else {
|
||||
storage_mode = MTLResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
/* Allocate a GPU buffer for the AABB data and populate it */
|
||||
id<MTLBuffer> aabbBuf = [device
|
||||
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
|
||||
options:storage_mode];
|
||||
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
|
||||
|
||||
/* Get AABBs for each motion step */
|
||||
size_t center_step = (num_motion_steps - 1) / 2;
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
/* The center step for motion vertices is not stored in the attribute */
|
||||
const float3 *keys = hair->get_curve_keys().data();
|
||||
if (step != center_step) {
|
||||
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
||||
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */
|
||||
keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
|
||||
}
|
||||
|
||||
for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
|
||||
const Hair::Curve curve = hair->get_curve(j);
|
||||
|
||||
for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
|
||||
{
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
|
||||
|
||||
const size_t index = step * num_segments + i;
|
||||
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
|
||||
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||
[aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
|
||||
}
|
||||
|
||||
# if 0
|
||||
for (size_t i=0; i<num_aabbs && i < 400; i++) {
|
||||
MTLAxisAlignedBoundingBox& bb = aabb_data[i];
|
||||
printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
|
||||
}
|
||||
# endif
|
||||
|
||||
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||
if (motion_blur) {
|
||||
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
|
||||
aabb_ptrs.reserve(num_motion_steps);
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
||||
k.buffer = aabbBuf;
|
||||
k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox);
|
||||
aabb_ptrs.push_back(k);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
|
||||
[MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
|
||||
geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
|
||||
count:aabb_ptrs.size()];
|
||||
geomDescMotion.boundingBoxCount = num_segments;
|
||||
geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
||||
geomDescMotion.intersectionFunctionTableOffset = 1;
|
||||
|
||||
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||
/* (Match optix behavior: unsigned int build_flags =
|
||||
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||
geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
geomDescMotion.opaque = true;
|
||||
geomDesc = geomDescMotion;
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
|
||||
[MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
|
||||
geomDescNoMotion.boundingBoxBuffer = aabbBuf;
|
||||
geomDescNoMotion.boundingBoxBufferOffset = 0;
|
||||
geomDescNoMotion.boundingBoxCount = int(num_aabbs);
|
||||
geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
||||
geomDescNoMotion.intersectionFunctionTableOffset = 1;
|
||||
|
||||
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||
/* (Match optix behavior: unsigned int build_flags =
|
||||
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||
geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
geomDescNoMotion.opaque = true;
|
||||
geomDesc = geomDescNoMotion;
|
||||
}
|
||||
|
||||
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||
|
||||
if (motion_blur) {
|
||||
accelDesc.motionStartTime = 0.0f;
|
||||
accelDesc.motionEndTime = 1.0f;
|
||||
accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
||||
accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
||||
accelDesc.motionKeyframeCount = num_motion_steps;
|
||||
}
|
||||
|
||||
if (!use_fast_trace_bvh) {
|
||||
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||
MTLAccelerationStructureUsagePreferFastBuild);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel_uncompressed = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
if (refit) {
|
||||
[accelEnc refitAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
destination:accel_uncompressed
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
else {
|
||||
[accelEnc buildAccelerationStructure:accel_uncompressed
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
if (use_fast_trace_bvh) {
|
||||
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
||||
toBuffer:sizeBuf
|
||||
offset:0
|
||||
sizeDataType:MTLDataTypeULong];
|
||||
}
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
/* free temp resources */
|
||||
[scratchBuf release];
|
||||
[aabbBuf release];
|
||||
|
||||
if (use_fast_trace_bvh) {
|
||||
/* Compact the accel structure */
|
||||
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
id<MTLAccelerationStructure> accel = [device
|
||||
newAccelerationStructureWithSize:compressed_size];
|
||||
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
||||
toAccelerationStructure:accel];
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
uint64_t allocated_size = [accel allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct = accel;
|
||||
[accel_uncompressed release];
|
||||
accel_struct_building = false;
|
||||
}];
|
||||
[accelCommands commit];
|
||||
});
|
||||
}
|
||||
else {
|
||||
/* set our acceleration structure to the uncompressed structure */
|
||||
accel_struct = accel_uncompressed;
|
||||
|
||||
uint64_t allocated_size = [accel_struct allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct_building = false;
|
||||
}
|
||||
[sizeBuf release];
|
||||
}];
|
||||
|
||||
accel_struct_building = true;
|
||||
[accelCommands commit];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build_BLAS(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
assert(objects.size() == 1 && geometry.size() == 1);
|
||||
|
||||
/* Build bottom level acceleration structures (BLAS) */
|
||||
Geometry *const geom = geometry[0];
|
||||
switch (geom->geometry_type) {
|
||||
case Geometry::VOLUME:
|
||||
case Geometry::MESH:
|
||||
return build_BLAS_mesh(progress, device, queue, geom, refit);
|
||||
case Geometry::HAIR:
|
||||
return build_BLAS_hair(progress, device, queue, geom, refit);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build_TLAS(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
|
||||
/* we need to sync here and ensure that all BLAS have completed async generation by both GCD
|
||||
* and Metal */
|
||||
{
|
||||
__block bool complete_bvh = false;
|
||||
while (!complete_bvh) {
|
||||
dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
complete_bvh = true;
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
continue;
|
||||
|
||||
Geometry const *geom = ob->get_geometry();
|
||||
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
|
||||
if (blas->accel_struct_building) {
|
||||
complete_bvh = false;
|
||||
|
||||
/* We're likely waiting on a command buffer that's in flight to complete.
|
||||
* Queue up a command buffer and wait for it complete before checking the BLAS again
|
||||
*/
|
||||
id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
|
||||
[command_buffer commit];
|
||||
[command_buffer waitUntilCompleted];
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t num_instances = 0;
|
||||
uint32_t num_motion_transforms = 0;
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
continue;
|
||||
num_instances++;
|
||||
|
||||
if (ob->use_motion()) {
|
||||
num_motion_transforms += max(1, ob->get_motion().size());
|
||||
}
|
||||
else {
|
||||
num_motion_transforms++;
|
||||
}
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
||||
/*------------------------------------------------*/
|
||||
|
||||
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||
|
||||
NSMutableArray *all_blas = [NSMutableArray array];
|
||||
unordered_map<BVHMetal const *, int> instance_mapping;
|
||||
|
||||
/* Lambda function to build/retrieve the BLAS index mapping */
|
||||
auto get_blas_index = [&](BVHMetal const *blas) {
|
||||
auto it = instance_mapping.find(blas);
|
||||
if (it != instance_mapping.end()) {
|
||||
return it->second;
|
||||
}
|
||||
else {
|
||||
int blas_index = (int)[all_blas count];
|
||||
instance_mapping[blas] = blas_index;
|
||||
if (@available(macos 12.0, *)) {
|
||||
[all_blas addObject:blas->accel_struct];
|
||||
}
|
||||
return blas_index;
|
||||
}
|
||||
};
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
else {
|
||||
storage_mode = MTLResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
size_t instance_size;
|
||||
if (motion_blur) {
|
||||
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
|
||||
}
|
||||
else {
|
||||
instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
|
||||
}
|
||||
|
||||
/* Allocate a GPU buffer for the instance data and populate it */
|
||||
id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size
|
||||
options:storage_mode];
|
||||
id<MTLBuffer> motion_transforms_buf = nil;
|
||||
MTLPackedFloat4x3 *motion_transforms = nullptr;
|
||||
if (motion_blur && num_motion_transforms) {
|
||||
motion_transforms_buf = [device
|
||||
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
|
||||
options:storage_mode];
|
||||
motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
|
||||
}
|
||||
|
||||
uint32_t instance_index = 0;
|
||||
uint32_t motion_transform_index = 0;
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
continue;
|
||||
|
||||
Geometry const *geom = ob->get_geometry();
|
||||
|
||||
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
|
||||
uint32_t accel_struct_index = get_blas_index(blas);
|
||||
|
||||
/* Add some of the object visibility bits to the mask.
|
||||
* __prim_visibility contains the combined visibility bits of all instances, so is not
|
||||
* reliable if they differ between instances.
|
||||
*
|
||||
* METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here
|
||||
* and select just a few important ones.
|
||||
*/
|
||||
uint32_t mask = ob->visibility_for_tracing() & 0xFF;
|
||||
|
||||
/* Have to have at least one bit in the mask, or else instance would always be culled. */
|
||||
if (0 == mask) {
|
||||
mask = 0xFF;
|
||||
}
|
||||
|
||||
/* Set user instance ID to object index */
|
||||
int object_index = ob->get_device_index();
|
||||
uint32_t user_id = uint32_t(object_index);
|
||||
|
||||
/* Bake into the appropriate descriptor */
|
||||
if (motion_blur) {
|
||||
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
|
||||
|
||||
desc.accelerationStructureIndex = accel_struct_index;
|
||||
desc.userID = user_id;
|
||||
desc.mask = mask;
|
||||
desc.motionStartTime = 0.0f;
|
||||
desc.motionEndTime = 1.0f;
|
||||
desc.motionTransformsStartIndex = motion_transform_index;
|
||||
desc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
||||
desc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
||||
desc.intersectionFunctionTableOffset = 0;
|
||||
|
||||
int key_count = ob->get_motion().size();
|
||||
if (key_count) {
|
||||
desc.motionTransformsCount = key_count;
|
||||
|
||||
Transform *keys = ob->get_motion().data();
|
||||
for (int i = 0; i < key_count; i++) {
|
||||
float *t = (float *)&motion_transforms[motion_transform_index++];
|
||||
/* Transpose transform */
|
||||
auto src = (float const *)&keys[i];
|
||||
for (int i = 0; i < 12; i++) {
|
||||
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
desc.motionTransformsCount = 1;
|
||||
|
||||
float *t = (float *)&motion_transforms[motion_transform_index++];
|
||||
if (ob->get_geometry()->is_instanced()) {
|
||||
/* Transpose transform */
|
||||
auto src = (float const *)&ob->get_tfm();
|
||||
for (int i = 0; i < 12; i++) {
|
||||
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Clear transform to identity matrix */
|
||||
t[0] = t[4] = t[8] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
|
||||
|
||||
desc.accelerationStructureIndex = accel_struct_index;
|
||||
desc.userID = user_id;
|
||||
desc.mask = mask;
|
||||
desc.intersectionFunctionTableOffset = 0;
|
||||
|
||||
float *t = (float *)&desc.transformationMatrix;
|
||||
if (ob->get_geometry()->is_instanced()) {
|
||||
/* Transpose transform */
|
||||
auto src = (float const *)&ob->get_tfm();
|
||||
for (int i = 0; i < 12; i++) {
|
||||
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Clear transform to identity matrix */
|
||||
t[0] = t[4] = t[8] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||
[instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
|
||||
if (motion_transforms_buf) {
|
||||
[motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
|
||||
assert(num_motion_transforms == motion_transform_index);
|
||||
}
|
||||
}
|
||||
|
||||
MTLInstanceAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLInstanceAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.instanceCount = num_instances;
|
||||
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
|
||||
accelDesc.instanceDescriptorBuffer = instanceBuf;
|
||||
accelDesc.instanceDescriptorBufferOffset = 0;
|
||||
accelDesc.instanceDescriptorStride = instance_size;
|
||||
accelDesc.instancedAccelerationStructures = all_blas;
|
||||
|
||||
if (motion_blur) {
|
||||
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
|
||||
accelDesc.motionTransformBuffer = motion_transforms_buf;
|
||||
accelDesc.motionTransformCount = num_motion_transforms;
|
||||
}
|
||||
|
||||
if (!use_fast_trace_bvh) {
|
||||
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||
MTLAccelerationStructureUsagePreferFastBuild);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
if (refit) {
|
||||
[accelEnc refitAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
destination:accel
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
else {
|
||||
[accelEnc buildAccelerationStructure:accel
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands commit];
|
||||
[accelCommands waitUntilCompleted];
|
||||
|
||||
if (motion_transforms_buf) {
|
||||
[motion_transforms_buf release];
|
||||
}
|
||||
[instanceBuf release];
|
||||
[scratchBuf release];
|
||||
|
||||
uint64_t allocated_size = [accel allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
|
||||
/* Cache top and bottom-level acceleration structs */
|
||||
accel_struct = accel;
|
||||
blas_array.clear();
|
||||
blas_array.reserve(all_blas.count);
|
||||
for (id<MTLAccelerationStructure> blas in all_blas) {
|
||||
blas_array.push_back(blas);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (refit && params.bvh_type != BVH_TYPE_STATIC) {
|
||||
assert(accel_struct);
|
||||
}
|
||||
else {
|
||||
if (accel_struct) {
|
||||
stats.mem_free(accel_struct.allocatedSize);
|
||||
[accel_struct release];
|
||||
accel_struct = nil;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.top_level) {
|
||||
return build_BLAS(progress, device, queue, refit);
|
||||
}
|
||||
else {
|
||||
return build_TLAS(progress, device, queue, refit);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/string.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Device;
|
||||
class DeviceInfo;
|
||||
class Profiler;
|
||||
class Stats;
|
||||
|
||||
bool device_metal_init();
|
||||
|
||||
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
void device_metal_info(vector<DeviceInfo> &devices);
|
||||
|
||||
string device_metal_capabilities();
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,136 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
|
||||
#endif
|
||||
|
||||
#include "util/debug.h"
|
||||
#include "util/set.h"
|
||||
#include "util/system.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
{
|
||||
return new MetalDevice(info, stats, profiler);
|
||||
}
|
||||
|
||||
bool device_metal_init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static int device_metal_get_num_devices_safe(uint32_t *num_devices)
|
||||
{
|
||||
*num_devices = MTLCopyAllDevices().count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void device_metal_info(vector<DeviceInfo> &devices)
|
||||
{
|
||||
uint32_t num_devices = 0;
|
||||
device_metal_get_num_devices_safe(&num_devices);
|
||||
if (num_devices == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<MetalPlatformDevice> usable_devices;
|
||||
MetalInfo::get_usable_devices(&usable_devices);
|
||||
/* Devices are numbered consecutively across platforms. */
|
||||
set<string> unique_ids;
|
||||
int device_index = 0;
|
||||
for (MetalPlatformDevice &device : usable_devices) {
|
||||
/* Compute unique ID for persistent user preferences. */
|
||||
const string &device_name = device.device_name;
|
||||
string id = string("METAL_") + device_name;
|
||||
|
||||
/* Hardware ID might not be unique, add device number in that case. */
|
||||
if (unique_ids.find(id) != unique_ids.end()) {
|
||||
id += string_printf("_ID_%d", num_devices);
|
||||
}
|
||||
unique_ids.insert(id);
|
||||
|
||||
/* Create DeviceInfo. */
|
||||
DeviceInfo info;
|
||||
info.type = DEVICE_METAL;
|
||||
info.description = string_remove_trademark(string(device_name));
|
||||
|
||||
/* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
|
||||
* GPU */
|
||||
if (info.description == system_cpu_brand_string()) {
|
||||
info.description += " (GPU)";
|
||||
}
|
||||
|
||||
info.num = device_index;
|
||||
/* We don't know if it's used for display, but assume it is. */
|
||||
info.display_device = true;
|
||||
info.denoisers = DENOISER_NONE;
|
||||
info.id = id;
|
||||
|
||||
devices.push_back(info);
|
||||
device_index++;
|
||||
}
|
||||
}
|
||||
|
||||
string device_metal_capabilities()
|
||||
{
|
||||
string result = "";
|
||||
string error_msg = "";
|
||||
uint32_t num_devices = 0;
|
||||
assert(device_metal_get_num_devices_safe(&num_devices));
|
||||
if (num_devices == 0) {
|
||||
return "No Metal devices found\n";
|
||||
}
|
||||
result += string_printf("Number of devices: %u\n", num_devices);
|
||||
|
||||
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||
for (id<MTLDevice> device in allDevices) {
|
||||
result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool device_metal_init()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void device_metal_info(vector<DeviceInfo> &devices)
|
||||
{
|
||||
}
|
||||
|
||||
string device_metal_capabilities()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,166 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
# include "device/device.h"
|
||||
# include "device/metal/bvh.h"
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/metal/queue.h"
|
||||
# include "device/metal/util.h"
|
||||
|
||||
# include <Metal/Metal.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class MetalDevice : public Device {
|
||||
public:
|
||||
id<MTLDevice> mtlDevice = nil;
|
||||
id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
|
||||
id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
|
||||
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||
string source_used_for_compile[PSO_NUM];
|
||||
|
||||
KernelParamsMetal launch_params = {0};
|
||||
|
||||
/* MetalRT members ----------------------------------*/
|
||||
BVHMetal *bvhMetalRT = nullptr;
|
||||
bool motion_blur = false;
|
||||
id<MTLArgumentEncoder> mtlASArgEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
|
||||
/*---------------------------------------------------*/
|
||||
|
||||
string device_name;
|
||||
MetalGPUVendor device_vendor;
|
||||
|
||||
uint kernel_features;
|
||||
MTLResourceOptions default_storage_mode;
|
||||
int max_threads_per_threadgroup;
|
||||
|
||||
int mtlDevId = 0;
|
||||
bool first_error = true;
|
||||
|
||||
struct MetalMem {
|
||||
device_memory *mem = nullptr;
|
||||
int pointer_index = -1;
|
||||
id<MTLBuffer> mtlBuffer = nil;
|
||||
id<MTLTexture> mtlTexture = nil;
|
||||
uint64_t offset = 0;
|
||||
uint64_t size = 0;
|
||||
void *hostPtr = nullptr;
|
||||
bool use_UMA = false; /* If true, UMA memory in shared_pointer is being used. */
|
||||
};
|
||||
typedef map<device_memory *, unique_ptr<MetalMem>> MetalMemMap;
|
||||
MetalMemMap metal_mem_map;
|
||||
std::vector<id<MTLResource>> delayed_free_list;
|
||||
std::recursive_mutex metal_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
||||
id<MTLBuffer> texture_bindings_2d = nil;
|
||||
id<MTLBuffer> texture_bindings_3d = nil;
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
|
||||
MetalDeviceKernels kernels;
|
||||
bool use_metalrt = false;
|
||||
bool use_function_specialisation = false;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
virtual ~MetalDevice();
|
||||
|
||||
bool support_device(const uint /*kernel_features*/);
|
||||
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
string get_source(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
virtual bool should_use_graphics_interop() override;
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* low-level memory management */
|
||||
|
||||
MetalMem *generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem)
|
||||
{
|
||||
mem_copy_from(mem, -1, -1, -1, -1);
|
||||
}
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
void mem_zero(device_memory &mem) override;
|
||||
|
||||
void mem_free(device_memory &mem) override;
|
||||
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
void global_alloc(device_memory &mem);
|
||||
|
||||
void global_free(device_memory &mem);
|
||||
|
||||
void tex_alloc(device_texture &mem);
|
||||
|
||||
void tex_alloc_as_buffer(device_texture &mem);
|
||||
|
||||
void tex_free(device_texture &mem);
|
||||
|
||||
void flush_delayed_free_list();
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@@ -1,168 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/kernel.h"
|
||||
# include <Metal/Metal.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class MetalDevice;
|
||||
|
||||
enum {
|
||||
METALRT_FUNC_DEFAULT_TRI,
|
||||
METALRT_FUNC_DEFAULT_BOX,
|
||||
METALRT_FUNC_SHADOW_TRI,
|
||||
METALRT_FUNC_SHADOW_BOX,
|
||||
METALRT_FUNC_LOCAL_TRI,
|
||||
METALRT_FUNC_LOCAL_BOX,
|
||||
METALRT_FUNC_CURVE_RIBBON,
|
||||
METALRT_FUNC_CURVE_RIBBON_SHADOW,
|
||||
METALRT_FUNC_CURVE_ALL,
|
||||
METALRT_FUNC_CURVE_ALL_SHADOW,
|
||||
METALRT_FUNC_NUM
|
||||
};
|
||||
|
||||
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
||||
|
||||
/* Pipeline State Object types */
|
||||
enum {
|
||||
/* A kernel that can be used with all scenes, supporting all features.
|
||||
* It is slow to compile, but only needs to be compiled once and is then
|
||||
* cached for future render sessions. This allows a render to get underway
|
||||
* on the GPU quickly.
|
||||
*/
|
||||
PSO_GENERIC,
|
||||
|
||||
/* A kernel that is relatively quick to compile, but is specialized for the
|
||||
* scene being rendered. It only contains the functionality and even baked in
|
||||
* constants for values that means it needs to be recompiled whenever a
|
||||
* dependent setting is changed. The render performance of this kernel is
|
||||
* significantly faster though, and justifies the extra compile time.
|
||||
*/
|
||||
/* METAL_WIP: This isn't used and will require more changes to enable. */
|
||||
PSO_SPECIALISED,
|
||||
|
||||
PSO_NUM
|
||||
};
|
||||
|
||||
const char *kernel_type_as_string(int kernel_type);
|
||||
|
||||
struct MetalKernelPipeline {
|
||||
void release()
|
||||
{
|
||||
if (pipeline) {
|
||||
[pipeline release];
|
||||
pipeline = nil;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
for (int i = 0; i < METALRT_TABLE_NUM; i++) {
|
||||
if (intersection_func_table[i]) {
|
||||
[intersection_func_table[i] release];
|
||||
intersection_func_table[i] = nil;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (function) {
|
||||
[function release];
|
||||
function = nil;
|
||||
}
|
||||
if (@available(macOS 11.0, *)) {
|
||||
for (int i = 0; i < METALRT_TABLE_NUM; i++) {
|
||||
if (intersection_func_table[i]) {
|
||||
[intersection_func_table[i] release];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool loaded = false;
|
||||
id<MTLFunction> function = nil;
|
||||
id<MTLComputePipelineState> pipeline = nil;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
|
||||
};
|
||||
|
||||
struct MetalKernelLoadDesc {
|
||||
int pso_index = 0;
|
||||
const char *function_name = nullptr;
|
||||
int kernel_index = 0;
|
||||
int threads_per_threadgroup = 0;
|
||||
MTLFunctionConstantValues *constant_values = nullptr;
|
||||
NSArray *linked_functions = nullptr;
|
||||
|
||||
struct IntersectorFunctions {
|
||||
NSArray *defaults;
|
||||
NSArray *shadow;
|
||||
NSArray *local;
|
||||
NSArray *operator[](int index) const
|
||||
{
|
||||
if (index == METALRT_TABLE_DEFAULT)
|
||||
return defaults;
|
||||
if (index == METALRT_TABLE_SHADOW)
|
||||
return shadow;
|
||||
return local;
|
||||
}
|
||||
} intersector_functions = {nullptr};
|
||||
};
|
||||
|
||||
/* Metal kernel and associate occupancy information. */
|
||||
class MetalDeviceKernel {
|
||||
public:
|
||||
~MetalDeviceKernel();
|
||||
|
||||
bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
|
||||
|
||||
void mark_loaded(int pso_index)
|
||||
{
|
||||
pso[pso_index].loaded = true;
|
||||
}
|
||||
|
||||
int get_num_threads_per_block() const
|
||||
{
|
||||
return num_threads_per_block;
|
||||
}
|
||||
const MetalKernelPipeline &get_pso() const;
|
||||
|
||||
double load_duration = 0.0;
|
||||
|
||||
private:
|
||||
MetalKernelPipeline pso[PSO_NUM];
|
||||
|
||||
int num_threads_per_block = 0;
|
||||
};
|
||||
|
||||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
class MetalDeviceKernels {
|
||||
public:
|
||||
bool load(MetalDevice *device, int kernel_type);
|
||||
bool available(DeviceKernel kernel) const;
|
||||
const MetalDeviceKernel &get(DeviceKernel kernel) const;
|
||||
|
||||
MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
|
||||
|
||||
id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
|
||||
|
||||
string loaded_md5[PSO_NUM];
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -1,525 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/tbb.h"
|
||||
# include "util/time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* limit to 2 MTLCompiler instances */
|
||||
int max_mtlcompiler_threads = 2;
|
||||
|
||||
const char *kernel_type_as_string(int kernel_type)
|
||||
{
|
||||
switch (kernel_type) {
|
||||
case PSO_GENERIC:
|
||||
return "PSO_GENERIC";
|
||||
case PSO_SPECIALISED:
|
||||
return "PSO_SPECIALISED";
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
MetalDeviceKernel::~MetalDeviceKernel()
|
||||
{
|
||||
for (int i = 0; i < PSO_NUM; i++) {
|
||||
pso[i].release();
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceKernel::load(MetalDevice *device,
|
||||
MetalKernelLoadDesc const &desc_in,
|
||||
MD5Hash const &md5)
|
||||
{
|
||||
__block MetalKernelLoadDesc const desc(desc_in);
|
||||
if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
/* skip megakernel */
|
||||
return true;
|
||||
}
|
||||
|
||||
bool use_binary_archive = true;
|
||||
if (getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||
use_binary_archive = false;
|
||||
}
|
||||
|
||||
id<MTLBinaryArchive> archive = nil;
|
||||
string metalbin_path;
|
||||
if (use_binary_archive) {
|
||||
NSProcessInfo *processInfo = [NSProcessInfo processInfo];
|
||||
string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
|
||||
MD5Hash local_md5(md5);
|
||||
local_md5.append(osVersion);
|
||||
string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
|
||||
to_string(desc.pso_index) + ".bin";
|
||||
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||
path_create_directories(metalbin_path);
|
||||
|
||||
if (path_exists(metalbin_path) && use_binary_archive) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||
archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||
[archiveDesc release];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NSString *entryPoint = [@(desc.function_name) copy];
|
||||
|
||||
NSError *error = NULL;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = entryPoint;
|
||||
if (desc.constant_values) {
|
||||
func_desc.constantValues = desc.constant_values;
|
||||
}
|
||||
pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
|
||||
newFunctionWithDescriptor:func_desc
|
||||
error:&error];
|
||||
}
|
||||
[entryPoint release];
|
||||
|
||||
if (pso[desc.pso_index].function == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
string errors = [err UTF8String];
|
||||
|
||||
device->set_error(
|
||||
string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
pso[desc.pso_index].function.label = [@(desc.function_name) copy];
|
||||
|
||||
__block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
|
||||
[[MTLComputePipelineDescriptor alloc] init];
|
||||
|
||||
computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
|
||||
computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
|
||||
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
|
||||
}
|
||||
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
||||
|
||||
computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* Attach the additional functions to an MTLLinkedFunctions object */
|
||||
if (desc.linked_functions) {
|
||||
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
|
||||
computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
|
||||
}
|
||||
|
||||
computePipelineStateDescriptor.maxCallStackDepth = 1;
|
||||
}
|
||||
|
||||
/* Create a new Compute pipeline state object */
|
||||
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||
|
||||
bool creating_new_archive = false;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if (use_binary_archive) {
|
||||
if (!archive) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = nil;
|
||||
archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||
creating_new_archive = true;
|
||||
|
||||
double starttime = time_dt();
|
||||
|
||||
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
|
||||
error:&error]) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf("Failed to add PSO to archive:\n%s\n",
|
||||
errStr ? [errStr UTF8String] : "nil");
|
||||
}
|
||||
else {
|
||||
double duration = time_dt() - starttime;
|
||||
metal_printf("%2d | %-55s | %7.2fs\n",
|
||||
desc.kernel_index,
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
duration);
|
||||
|
||||
if (desc.pso_index == PSO_GENERIC) {
|
||||
this->load_duration = duration;
|
||||
}
|
||||
}
|
||||
}
|
||||
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||
}
|
||||
}
|
||||
|
||||
double starttime = time_dt();
|
||||
|
||||
MTLNewComputePipelineStateWithReflectionCompletionHandler completionHandler = ^(
|
||||
id<MTLComputePipelineState> computePipelineState,
|
||||
MTLComputePipelineReflection *reflection,
|
||||
NSError *error) {
|
||||
bool recreate_archive = false;
|
||||
if (computePipelineState == nil && archive && !creating_new_archive) {
|
||||
|
||||
assert(0);
|
||||
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf(
|
||||
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
|
||||
"(error: %s)\n",
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
errStr ? [errStr UTF8String] : "nil");
|
||||
computePipelineState = [device->mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:MTLPipelineOptionNone
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
recreate_archive = true;
|
||||
}
|
||||
|
||||
double duration = time_dt() - starttime;
|
||||
|
||||
if (computePipelineState == nil) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
|
||||
(errStr ? [errStr UTF8String] : "nil"));
|
||||
metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
|
||||
desc.kernel_index,
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
duration);
|
||||
return;
|
||||
}
|
||||
|
||||
pso[desc.pso_index].pipeline = computePipelineState;
|
||||
num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
|
||||
computePipelineState.threadExecutionWidth);
|
||||
num_threads_per_block = std::max(num_threads_per_block,
|
||||
(int)computePipelineState.threadExecutionWidth);
|
||||
|
||||
if (!use_binary_archive) {
|
||||
metal_printf("%2d | %-55s | %7.2fs\n",
|
||||
desc.kernel_index,
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
duration);
|
||||
|
||||
if (desc.pso_index == PSO_GENERIC) {
|
||||
this->load_duration = duration;
|
||||
}
|
||||
}
|
||||
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if (creating_new_archive || recreate_archive) {
|
||||
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
|
||||
error:&error]) {
|
||||
metal_printf("Failed to save binary archive, error:\n%s\n",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[computePipelineStateDescriptor release];
|
||||
computePipelineStateDescriptor = nil;
|
||||
|
||||
if (device->use_metalrt && desc.linked_functions) {
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLIntersectionFunctionTableDescriptor *ift_desc =
|
||||
[[MTLIntersectionFunctionTableDescriptor alloc] init];
|
||||
ift_desc.functionCount = desc.intersector_functions[table].count;
|
||||
|
||||
pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
|
||||
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||
|
||||
/* Finally write the function handles into this pipeline's table */
|
||||
for (int i = 0; i < 2; i++) {
|
||||
id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
|
||||
functionHandleWithFunction:desc.intersector_functions[table][i]];
|
||||
[pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mark_loaded(desc.pso_index);
|
||||
};
|
||||
|
||||
if (desc.pso_index == PSO_SPECIALISED) {
|
||||
/* Asynchronous load */
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
NSError *error;
|
||||
id<MTLComputePipelineState> pipeline = [device->mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
completionHandler(pipeline, nullptr, error);
|
||||
});
|
||||
}
|
||||
else {
|
||||
/* Block on load to ensure we continue with a valid kernel function */
|
||||
id<MTLComputePipelineState> pipeline = [device->mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
completionHandler(pipeline, nullptr, error);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
|
||||
{
|
||||
if (pso[PSO_SPECIALISED].loaded) {
|
||||
return pso[PSO_SPECIALISED];
|
||||
}
|
||||
|
||||
assert(pso[PSO_GENERIC].loaded);
|
||||
return pso[PSO_GENERIC];
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
|
||||
{
|
||||
bool any_error = false;
|
||||
|
||||
MD5Hash md5;
|
||||
|
||||
/* Build the function constant table */
|
||||
MTLFunctionConstantValues *constant_values = nullptr;
|
||||
if (kernel_type == PSO_SPECIALISED) {
|
||||
constant_values = [MTLFunctionConstantValues new];
|
||||
|
||||
# define KERNEL_FILM(_type, name) \
|
||||
[constant_values setConstantValue:&data.film.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_film_##name]; \
|
||||
md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
|
||||
|
||||
# define KERNEL_BACKGROUND(_type, name) \
|
||||
[constant_values setConstantValue:&data.background.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_background_##name]; \
|
||||
md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
|
||||
|
||||
# define KERNEL_INTEGRATOR(_type, name) \
|
||||
[constant_values setConstantValue:&data.integrator.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_integrator_##name]; \
|
||||
md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
|
||||
|
||||
# define KERNEL_BVH(_type, name) \
|
||||
[constant_values setConstantValue:&data.bvh.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_bvh_##name]; \
|
||||
md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
|
||||
|
||||
/* METAL_WIP: populate constant_values based on KernelData */
|
||||
assert(0);
|
||||
/*
|
||||
const KernelData &data = device->launch_params.data;
|
||||
# include "kernel/types/background.h"
|
||||
# include "kernel/types/bvh.h"
|
||||
# include "kernel/types/film.h"
|
||||
# include "kernel/types/integrator.h"
|
||||
*/
|
||||
}
|
||||
|
||||
if (device->use_metalrt) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* create the id<MTLFunction> for each intersection function */
|
||||
const char *function_names[] = {
|
||||
"__anyhit__cycles_metalrt_visibility_test_tri",
|
||||
"__anyhit__cycles_metalrt_visibility_test_box",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||
"__anyhit__cycles_metalrt_local_hit_box",
|
||||
"__intersection__curve_ribbon",
|
||||
"__intersection__curve_ribbon_shadow",
|
||||
"__intersection__curve_all",
|
||||
"__intersection__curve_all_shadow",
|
||||
};
|
||||
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
|
||||
|
||||
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
if (kernel_type == PSO_SPECIALISED) {
|
||||
desc.constantValues = constant_values;
|
||||
}
|
||||
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
|
||||
const char *function_name = function_names[i];
|
||||
desc.name = [@(function_name) copy];
|
||||
|
||||
NSError *error = NULL;
|
||||
rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
|
||||
newFunctionWithDescriptor:desc
|
||||
error:&error];
|
||||
|
||||
if (rt_intersection_funcs[kernel_type][i] == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
string errors = [err UTF8String];
|
||||
|
||||
device->set_error(string_printf(
|
||||
"Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
|
||||
any_error = true;
|
||||
break;
|
||||
}
|
||||
|
||||
rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
|
||||
}
|
||||
}
|
||||
}
|
||||
md5.append(device->source_used_for_compile[kernel_type]);
|
||||
|
||||
string hash = md5.get_hex();
|
||||
if (loaded_md5[kernel_type] == hash) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!any_error) {
|
||||
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||
NSArray *function_list = nil;
|
||||
|
||||
if (device->use_metalrt) {
|
||||
id<MTLFunction> box_intersect_default = nil;
|
||||
id<MTLFunction> box_intersect_shadow = nil;
|
||||
if (device->kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
/* Add curve intersection programs. */
|
||||
if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
/* Slower programs for thick hair since that also slows down ribbons.
|
||||
* Ideally this should not be needed. */
|
||||
box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
|
||||
box_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
|
||||
}
|
||||
else {
|
||||
box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
|
||||
box_intersect_shadow =
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
|
||||
}
|
||||
}
|
||||
table_functions[METALRT_TABLE_DEFAULT] = [NSArray
|
||||
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
|
||||
box_intersect_default ?
|
||||
box_intersect_default :
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
|
||||
nil];
|
||||
table_functions[METALRT_TABLE_SHADOW] = [NSArray
|
||||
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
|
||||
box_intersect_shadow ?
|
||||
box_intersect_shadow :
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
|
||||
nil];
|
||||
table_functions[METALRT_TABLE_LOCAL] = [NSArray
|
||||
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
|
||||
nil];
|
||||
|
||||
NSMutableSet *unique_functions = [NSMutableSet
|
||||
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
|
||||
|
||||
function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
|
||||
sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
|
||||
return [f1.label compare:f2.label];
|
||||
}];
|
||||
|
||||
unique_functions = nil;
|
||||
}
|
||||
|
||||
metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
|
||||
kernel_type_as_string(kernel_type));
|
||||
|
||||
tbb::task_arena local_arena(max_mtlcompiler_threads);
|
||||
local_arena.execute([&]() {
|
||||
tbb::parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
|
||||
/* skip megakernel */
|
||||
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only specialize kernels where it can make an impact. */
|
||||
if (kernel_type == PSO_SPECIALISED) {
|
||||
if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
MetalDeviceKernel &kernel = kernels_[i];
|
||||
|
||||
const std::string function_name = std::string("cycles_metal_") +
|
||||
device_kernel_as_string((DeviceKernel)i);
|
||||
int threads_per_threadgroup = device->max_threads_per_threadgroup;
|
||||
if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
|
||||
/* Always use 512 for the sorting kernels */
|
||||
threads_per_threadgroup = 512;
|
||||
}
|
||||
|
||||
NSArray *kernel_function_list = nil;
|
||||
|
||||
if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
kernel_function_list = function_list;
|
||||
}
|
||||
|
||||
MetalKernelLoadDesc desc;
|
||||
desc.pso_index = kernel_type;
|
||||
desc.kernel_index = i;
|
||||
desc.linked_functions = kernel_function_list;
|
||||
desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
|
||||
desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
|
||||
desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
|
||||
desc.constant_values = constant_values;
|
||||
desc.threads_per_threadgroup = threads_per_threadgroup;
|
||||
desc.function_name = function_name.c_str();
|
||||
|
||||
bool success = kernel.load(device, desc, md5);
|
||||
|
||||
any_error |= !success;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
bool loaded = !any_error;
|
||||
if (loaded) {
|
||||
loaded_md5[kernel_type] = hash;
|
||||
}
|
||||
return loaded;
|
||||
}
|
||||
|
||||
const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
|
||||
{
|
||||
return kernels_[(int)kernel];
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::available(DeviceKernel kernel) const
|
||||
{
|
||||
return kernels_[(int)kernel].get_pso().function != nil;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL*/
|
@@ -1,99 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/kernel.h"
|
||||
# include "device/memory.h"
|
||||
# include "device/queue.h"
|
||||
|
||||
# include "device/metal/util.h"
|
||||
# include "kernel/device/metal/globals.h"
|
||||
|
||||
# define metal_printf VLOG(4) << string_printf
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class MetalDevice;
|
||||
|
||||
/* Base class for Metal queues. */
|
||||
class MetalDeviceQueue : public DeviceQueue {
|
||||
public:
|
||||
MetalDeviceQueue(MetalDevice *device);
|
||||
~MetalDeviceQueue();
|
||||
|
||||
virtual int num_concurrent_states(const size_t) const override;
|
||||
virtual int num_concurrent_busy_states() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args) override;
|
||||
|
||||
virtual bool synchronize() override;
|
||||
|
||||
virtual void zero_to_device(device_memory &mem) override;
|
||||
virtual void copy_to_device(device_memory &mem) override;
|
||||
virtual void copy_from_device(device_memory &mem) override;
|
||||
|
||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||
|
||||
protected:
|
||||
void prepare_resources(DeviceKernel kernel);
|
||||
|
||||
id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
|
||||
id<MTLBlitCommandEncoder> get_blit_encoder();
|
||||
|
||||
MetalDevice *metal_device;
|
||||
MetalBufferPool temp_buffer_pool;
|
||||
|
||||
API_AVAILABLE(macos(11.0), ios(14.0))
|
||||
MTLCommandBufferDescriptor *command_buffer_desc = nullptr;
|
||||
id<MTLDevice> mtlDevice = nil;
|
||||
id<MTLCommandQueue> mtlCommandQueue = nil;
|
||||
id<MTLCommandBuffer> mtlCommandBuffer = nil;
|
||||
id<MTLComputeCommandEncoder> mtlComputeEncoder = nil;
|
||||
id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
|
||||
API_AVAILABLE(macos(10.14), ios(14.0))
|
||||
id<MTLSharedEvent> shared_event = nil;
|
||||
API_AVAILABLE(macos(10.14), ios(14.0))
|
||||
MTLSharedEventListener *shared_event_listener = nil;
|
||||
|
||||
dispatch_queue_t event_queue;
|
||||
dispatch_semaphore_t wait_semaphore;
|
||||
|
||||
struct CopyBack {
|
||||
void *host_pointer;
|
||||
void *gpu_mem;
|
||||
uint64_t size;
|
||||
};
|
||||
std::vector<CopyBack> copy_back_mem;
|
||||
|
||||
uint64_t shared_event_id;
|
||||
uint64_t command_buffers_submitted = 0;
|
||||
uint64_t command_buffers_completed = 0;
|
||||
Stats &stats;
|
||||
|
||||
void close_compute_encoder();
|
||||
void close_blit_encoder();
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -1,610 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/queue.h"
|
||||
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "device/metal/kernel.h"
|
||||
|
||||
# include "util/path.h"
|
||||
# include "util/string.h"
|
||||
# include "util/time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* MetalDeviceQueue */
|
||||
|
||||
MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
|
||||
: DeviceQueue(device), metal_device(device), stats(device->stats)
|
||||
{
|
||||
if (@available(macos 11.0, *)) {
|
||||
command_buffer_desc = [[MTLCommandBufferDescriptor alloc] init];
|
||||
command_buffer_desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||
}
|
||||
|
||||
mtlDevice = device->mtlDevice;
|
||||
mtlCommandQueue = [mtlDevice newCommandQueue];
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
shared_event = [mtlDevice newSharedEvent];
|
||||
shared_event_id = 1;
|
||||
|
||||
/* Shareable event listener */
|
||||
event_queue = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
|
||||
shared_event_listener = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue];
|
||||
}
|
||||
|
||||
wait_semaphore = dispatch_semaphore_create(0);
|
||||
}
|
||||
|
||||
MetalDeviceQueue::~MetalDeviceQueue()
|
||||
{
|
||||
/* Tidying up here isn't really practical - we should expect and require the work
|
||||
* queue to be empty here. */
|
||||
assert(mtlCommandBuffer == nil);
|
||||
assert(command_buffers_submitted == command_buffers_completed);
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
[shared_event_listener release];
|
||||
[shared_event release];
|
||||
}
|
||||
|
||||
if (@available(macos 11.0, *)) {
|
||||
[command_buffer_desc release];
|
||||
}
|
||||
if (mtlCommandQueue) {
|
||||
[mtlCommandQueue release];
|
||||
mtlCommandQueue = nil;
|
||||
}
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
|
||||
{
|
||||
/* METAL_WIP */
|
||||
/* TODO: compute automatically. */
|
||||
/* TODO: must have at least num_threads_per_block. */
|
||||
int result = 1048576;
|
||||
if (metal_device->device_vendor == METAL_GPU_AMD) {
|
||||
result *= 2;
|
||||
}
|
||||
else if (metal_device->device_vendor == METAL_GPU_APPLE) {
|
||||
result *= 4;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_concurrent_busy_states() const
|
||||
{
|
||||
/* METAL_WIP */
|
||||
/* TODO: compute automatically. */
|
||||
int result = 65536;
|
||||
if (metal_device->device_vendor == METAL_GPU_AMD) {
|
||||
result *= 2;
|
||||
}
|
||||
else if (metal_device->device_vendor == METAL_GPU_APPLE) {
|
||||
result *= 4;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::init_execution()
|
||||
{
|
||||
/* Synchronize all textures and memory copies before executing task. */
|
||||
metal_device->load_texture_info();
|
||||
|
||||
synchronize();
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args)
|
||||
{
|
||||
if (metal_device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
|
||||
<< work_size;
|
||||
|
||||
const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
|
||||
const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
|
||||
|
||||
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
|
||||
|
||||
/* Determine size requirement for argument buffer. */
|
||||
size_t arg_buffer_length = 0;
|
||||
for (size_t i = 0; i < args.count; i++) {
|
||||
size_t size_in_bytes = args.sizes[i];
|
||||
arg_buffer_length = round_up(arg_buffer_length, size_in_bytes) + size_in_bytes;
|
||||
}
|
||||
/* 256 is the Metal offset alignment for constant address space bindings */
|
||||
arg_buffer_length = round_up(arg_buffer_length, 256);
|
||||
|
||||
/* Globals placed after "vanilla" arguments. */
|
||||
size_t globals_offsets = arg_buffer_length;
|
||||
arg_buffer_length += sizeof(KernelParamsMetal);
|
||||
arg_buffer_length = round_up(arg_buffer_length, 256);
|
||||
|
||||
/* Metal ancillary bindless pointers. */
|
||||
size_t metal_offsets = arg_buffer_length;
|
||||
arg_buffer_length += metal_device->mtlAncillaryArgEncoder.encodedLength;
|
||||
arg_buffer_length = round_up(arg_buffer_length, metal_device->mtlAncillaryArgEncoder.alignment);
|
||||
|
||||
/* Temporary buffer used to prepare arg_buffer */
|
||||
uint8_t *init_arg_buffer = (uint8_t *)alloca(arg_buffer_length);
|
||||
memset(init_arg_buffer, 0, arg_buffer_length);
|
||||
|
||||
/* Prepare the non-pointer "enqueue" arguments */
|
||||
size_t bytes_written = 0;
|
||||
for (size_t i = 0; i < args.count; i++) {
|
||||
size_t size_in_bytes = args.sizes[i];
|
||||
bytes_written = round_up(bytes_written, size_in_bytes);
|
||||
if (args.types[i] != DeviceKernelArguments::POINTER) {
|
||||
memcpy(init_arg_buffer + bytes_written, args.values[i], size_in_bytes);
|
||||
}
|
||||
bytes_written += size_in_bytes;
|
||||
}
|
||||
|
||||
/* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
|
||||
/* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
|
||||
size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, __integrator_state) +
|
||||
sizeof(IntegratorStateGPU);
|
||||
size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
|
||||
memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
|
||||
(uint8_t *)&metal_device->launch_params + plain_old_launch_data_offset,
|
||||
plain_old_launch_data_size);
|
||||
|
||||
/* Allocate an argument buffer. */
|
||||
MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if ([mtlDevice hasUnifiedMemory]) {
|
||||
arg_buffer_options = MTLResourceStorageModeShared;
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLBuffer> arg_buffer = temp_buffer_pool.get_buffer(
|
||||
mtlDevice, mtlCommandBuffer, arg_buffer_length, arg_buffer_options, init_arg_buffer, stats);
|
||||
|
||||
/* Encode the pointer "enqueue" arguments */
|
||||
bytes_written = 0;
|
||||
for (size_t i = 0; i < args.count; i++) {
|
||||
size_t size_in_bytes = args.sizes[i];
|
||||
bytes_written = round_up(bytes_written, size_in_bytes);
|
||||
if (args.types[i] == DeviceKernelArguments::POINTER) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
|
||||
offset:bytes_written];
|
||||
if (MetalDevice::MetalMem *mmem = *(MetalDevice::MetalMem **)args.values[i]) {
|
||||
[mtlComputeCommandEncoder useResource:mmem->mtlBuffer
|
||||
usage:MTLResourceUsageRead | MTLResourceUsageWrite];
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
|
||||
}
|
||||
else {
|
||||
if (@available(macos 12.0, *)) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:0];
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes_written += size_in_bytes;
|
||||
}
|
||||
|
||||
/* Encode KernelParamsMetal buffers */
|
||||
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer offset:globals_offsets];
|
||||
|
||||
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
|
||||
const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) +
|
||||
sizeof(IntegratorStateGPU);
|
||||
for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
|
||||
int pointer_index = offset / sizeof(device_ptr);
|
||||
MetalDevice::MetalMem *mmem = *(
|
||||
MetalDevice::MetalMem **)((uint8_t *)&metal_device->launch_params + offset);
|
||||
if (mmem && (mmem->mtlBuffer || mmem->mtlTexture)) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
|
||||
offset:0
|
||||
atIndex:pointer_index];
|
||||
}
|
||||
else {
|
||||
if (@available(macos 12.0, *)) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:pointer_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
|
||||
|
||||
/* Encode ancillaries */
|
||||
[metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
|
||||
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
|
||||
offset:0
|
||||
atIndex:0];
|
||||
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_3d
|
||||
offset:0
|
||||
atIndex:1];
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (metal_device->use_metalrt) {
|
||||
if (metal_device->bvhMetalRT) {
|
||||
id<MTLAccelerationStructure> accel_struct = metal_device->bvhMetalRT->accel_struct;
|
||||
[metal_device->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
|
||||
}
|
||||
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
if (metal_kernel_pso.intersection_func_table[table]) {
|
||||
[metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer
|
||||
offset:globals_offsets
|
||||
atIndex:1];
|
||||
[metal_device->mtlAncillaryArgEncoder
|
||||
setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table]
|
||||
atIndex:3 + table];
|
||||
[mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table]
|
||||
usage:MTLResourceUsageRead];
|
||||
}
|
||||
else {
|
||||
[metal_device->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
|
||||
atIndex:3 + table];
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes_written = metal_offsets + metal_device->mtlAncillaryArgEncoder.encodedLength;
|
||||
}
|
||||
|
||||
if (arg_buffer.storageMode == MTLStorageModeManaged) {
|
||||
[arg_buffer didModifyRange:NSMakeRange(0, bytes_written)];
|
||||
}
|
||||
|
||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
|
||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
|
||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
|
||||
|
||||
if (metal_device->use_metalrt) {
|
||||
if (@available(macos 12.0, *)) {
|
||||
|
||||
auto bvhMetalRT = metal_device->bvhMetalRT;
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
break;
|
||||
default:
|
||||
bvhMetalRT = nil;
|
||||
break;
|
||||
}
|
||||
|
||||
if (bvhMetalRT) {
|
||||
/* Mark all Accelerations resources as used */
|
||||
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
|
||||
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
|
||||
count:bvhMetalRT->blas_array.size()
|
||||
usage:MTLResourceUsageRead];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
|
||||
|
||||
/* Compute kernel launch parameters. */
|
||||
const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
|
||||
|
||||
int shared_mem_bytes = 0;
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
|
||||
/* See parallel_active_index.h for why this amount of shared memory is needed.
|
||||
* Rounded up to 16 bytes for Metal */
|
||||
shared_mem_bytes = round_up((num_threads_per_block + 1) * sizeof(int), 16);
|
||||
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
|
||||
divide_up(work_size, num_threads_per_block), 1, 1);
|
||||
MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
|
||||
[mtlComputeCommandEncoder dispatchThreadgroups:size_threadgroups_per_dispatch
|
||||
threadsPerThreadgroup:size_threads_per_threadgroup];
|
||||
|
||||
[mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
NSString *kernel_name = metal_kernel_pso.function.label;
|
||||
|
||||
/* Enhanced command buffer errors are only available in 11.0+ */
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
|
||||
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||
NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
|
||||
valueForKey:MTLCommandBufferEncoderInfoErrorKey];
|
||||
if (encoderInfos != nil) {
|
||||
for (id<MTLCommandBufferEncoderInfo> encoderInfo : encoderInfos) {
|
||||
NSLog(@"%@", encoderInfo);
|
||||
}
|
||||
}
|
||||
id<MTLLogContainer> logs = command_buffer.logs;
|
||||
for (id<MTLFunctionLog> log in logs) {
|
||||
NSLog(@"%@", log);
|
||||
}
|
||||
}
|
||||
else if (command_buffer.error) {
|
||||
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||
}
|
||||
}
|
||||
}];
|
||||
|
||||
return !(metal_device->have_error());
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::synchronize()
|
||||
{
|
||||
if (metal_device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mtlComputeEncoder) {
|
||||
close_compute_encoder();
|
||||
}
|
||||
close_blit_encoder();
|
||||
|
||||
if (mtlCommandBuffer) {
|
||||
uint64_t shared_event_id = this->shared_event_id++;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
__block dispatch_semaphore_t block_sema = wait_semaphore;
|
||||
[shared_event notifyListener:shared_event_listener
|
||||
atValue:shared_event_id
|
||||
block:^(id<MTLSharedEvent> sharedEvent, uint64_t value) {
|
||||
dispatch_semaphore_signal(block_sema);
|
||||
}];
|
||||
|
||||
[mtlCommandBuffer encodeSignalEvent:shared_event value:shared_event_id];
|
||||
[mtlCommandBuffer commit];
|
||||
dispatch_semaphore_wait(wait_semaphore, DISPATCH_TIME_FOREVER);
|
||||
}
|
||||
|
||||
[mtlCommandBuffer release];
|
||||
|
||||
for (const CopyBack &mmem : copy_back_mem) {
|
||||
memcpy((uchar *)mmem.host_pointer, (uchar *)mmem.gpu_mem, mmem.size);
|
||||
}
|
||||
copy_back_mem.clear();
|
||||
|
||||
temp_buffer_pool.process_command_buffer_completion(mtlCommandBuffer);
|
||||
metal_device->flush_delayed_free_list();
|
||||
|
||||
mtlCommandBuffer = nil;
|
||||
}
|
||||
|
||||
return !(metal_device->have_error());
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::zero_to_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate on demand. */
|
||||
if (mem.device_pointer == 0) {
|
||||
metal_device->mem_alloc(mem);
|
||||
}
|
||||
|
||||
/* Zero memory on device. */
|
||||
assert(mem.device_pointer != 0);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
|
||||
if (mmem.mtlBuffer) {
|
||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||
[blitEncoder fillBuffer:mmem.mtlBuffer range:NSMakeRange(mmem.offset, mmem.size) value:0];
|
||||
}
|
||||
else {
|
||||
metal_device->mem_zero(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::copy_to_device(device_memory &mem)
|
||||
{
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate on demand. */
|
||||
if (mem.device_pointer == 0) {
|
||||
metal_device->mem_alloc(mem);
|
||||
}
|
||||
|
||||
assert(mem.device_pointer != 0);
|
||||
assert(mem.host_pointer != nullptr);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
auto result = metal_device->metal_mem_map.find(&mem);
|
||||
if (result != metal_device->metal_mem_map.end()) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
MetalDevice::MetalMem &mmem = *result->second;
|
||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||
|
||||
id<MTLBuffer> buffer = temp_buffer_pool.get_buffer(mtlDevice,
|
||||
mtlCommandBuffer,
|
||||
mmem.size,
|
||||
MTLResourceStorageModeShared,
|
||||
mem.host_pointer,
|
||||
stats);
|
||||
|
||||
[blitEncoder copyFromBuffer:buffer
|
||||
sourceOffset:0
|
||||
toBuffer:mmem.mtlBuffer
|
||||
destinationOffset:mmem.offset
|
||||
size:mmem.size];
|
||||
}
|
||||
else {
|
||||
metal_device->mem_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::copy_from_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(mem.device_pointer != 0);
|
||||
assert(mem.host_pointer != nullptr);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
|
||||
if (mmem.mtlBuffer) {
|
||||
const size_t size = mem.memory_size();
|
||||
|
||||
if (mem.device_pointer) {
|
||||
if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
|
||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||
[blitEncoder synchronizeResource:mmem.mtlBuffer];
|
||||
}
|
||||
if (mem.host_pointer != mmem.hostPtr) {
|
||||
if (mtlCommandBuffer) {
|
||||
copy_back_mem.push_back({mem.host_pointer, mmem.hostPtr, size});
|
||||
}
|
||||
else {
|
||||
memcpy((uchar *)mem.host_pointer, (uchar *)mmem.hostPtr, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
memset((char *)mem.host_pointer, 0, size);
|
||||
}
|
||||
}
|
||||
else {
|
||||
metal_device->mem_copy_from(mem);
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::kernel_available(DeviceKernel kernel) const
|
||||
{
|
||||
return metal_device->kernels.available(kernel);
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
|
||||
/* declare resource usage */
|
||||
for (auto &it : metal_device->metal_mem_map) {
|
||||
device_memory *mem = it.first;
|
||||
|
||||
MTLResourceUsage usage = MTLResourceUsageRead;
|
||||
if (mem->type != MEM_GLOBAL && mem->type != MEM_READ_ONLY && mem->type != MEM_TEXTURE) {
|
||||
usage |= MTLResourceUsageWrite;
|
||||
}
|
||||
|
||||
if (it.second->mtlBuffer) {
|
||||
/* METAL_WIP - use array version (i.e. useResources) */
|
||||
[mtlComputeEncoder useResource:it.second->mtlBuffer usage:usage];
|
||||
}
|
||||
else if (it.second->mtlTexture) {
|
||||
/* METAL_WIP - use array version (i.e. useResources) */
|
||||
[mtlComputeEncoder useResource:it.second->mtlTexture usage:usage | MTLResourceUsageSample];
|
||||
}
|
||||
}
|
||||
|
||||
/* ancillaries */
|
||||
[mtlComputeEncoder useResource:metal_device->texture_bindings_2d usage:MTLResourceUsageRead];
|
||||
[mtlComputeEncoder useResource:metal_device->texture_bindings_3d usage:MTLResourceUsageRead];
|
||||
}
|
||||
|
||||
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
|
||||
{
|
||||
bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
if (mtlComputeEncoder) {
|
||||
if (mtlComputeEncoder.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial) {
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
|
||||
return mtlComputeEncoder;
|
||||
}
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
close_blit_encoder();
|
||||
|
||||
if (!mtlCommandBuffer) {
|
||||
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
|
||||
[mtlCommandBuffer retain];
|
||||
}
|
||||
|
||||
mtlComputeEncoder = [mtlCommandBuffer
|
||||
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial];
|
||||
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
}
|
||||
|
||||
return mtlComputeEncoder;
|
||||
}
|
||||
|
||||
id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
|
||||
{
|
||||
if (mtlBlitEncoder) {
|
||||
return mtlBlitEncoder;
|
||||
}
|
||||
|
||||
if (mtlComputeEncoder) {
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
if (!mtlCommandBuffer) {
|
||||
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
|
||||
[mtlCommandBuffer retain];
|
||||
}
|
||||
|
||||
mtlBlitEncoder = [mtlCommandBuffer blitCommandEncoder];
|
||||
return mtlBlitEncoder;
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::close_compute_encoder()
|
||||
{
|
||||
[mtlComputeEncoder endEncoding];
|
||||
mtlComputeEncoder = nil;
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::close_blit_encoder()
|
||||
{
|
||||
if (mtlBlitEncoder) {
|
||||
[mtlBlitEncoder endEncoding];
|
||||
mtlBlitEncoder = nil;
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -1,101 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include <Metal/Metal.h>
|
||||
# include <string>
|
||||
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/queue.h"
|
||||
|
||||
# include "util/thread.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
enum MetalGPUVendor {
|
||||
METAL_GPU_UNKNOWN = 0,
|
||||
METAL_GPU_APPLE = 1,
|
||||
METAL_GPU_AMD = 2,
|
||||
METAL_GPU_INTEL = 3,
|
||||
};
|
||||
|
||||
/* Retains a named MTLDevice for device enumeration. */
|
||||
struct MetalPlatformDevice {
|
||||
MetalPlatformDevice(id<MTLDevice> device, const string &device_name)
|
||||
: device_id(device), device_name(device_name)
|
||||
{
|
||||
[device_id retain];
|
||||
}
|
||||
~MetalPlatformDevice()
|
||||
{
|
||||
[device_id release];
|
||||
}
|
||||
id<MTLDevice> device_id;
|
||||
string device_name;
|
||||
};
|
||||
|
||||
/* Contains static Metal helper functions. */
|
||||
struct MetalInfo {
|
||||
static bool device_version_check(id<MTLDevice> device);
|
||||
static void get_usable_devices(vector<MetalPlatformDevice> *usable_devices);
|
||||
static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
|
||||
|
||||
/* Platform information. */
|
||||
static bool get_num_devices(uint32_t *num_platforms);
|
||||
static uint32_t get_num_devices();
|
||||
|
||||
static bool get_device_name(id<MTLDevice> device_id, string *device_name);
|
||||
static string get_device_name(id<MTLDevice> device_id);
|
||||
};
|
||||
|
||||
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
|
||||
class MetalBufferPool {
|
||||
struct MetalBufferListEntry {
|
||||
MetalBufferListEntry(id<MTLBuffer> buffer, id<MTLCommandBuffer> command_buffer)
|
||||
: buffer(buffer), command_buffer(command_buffer)
|
||||
{
|
||||
}
|
||||
|
||||
MetalBufferListEntry() = delete;
|
||||
|
||||
id<MTLBuffer> buffer;
|
||||
id<MTLCommandBuffer> command_buffer;
|
||||
};
|
||||
std::vector<MetalBufferListEntry> buffer_free_list;
|
||||
std::vector<MetalBufferListEntry> buffer_in_use_list;
|
||||
thread_mutex buffer_mutex;
|
||||
size_t total_temp_mem_size = 0;
|
||||
|
||||
public:
|
||||
MetalBufferPool() = default;
|
||||
~MetalBufferPool();
|
||||
|
||||
id<MTLBuffer> get_buffer(id<MTLDevice> device,
|
||||
id<MTLCommandBuffer> command_buffer,
|
||||
NSUInteger length,
|
||||
MTLResourceOptions options,
|
||||
const void *pointer,
|
||||
Stats &stats);
|
||||
void process_command_buffer_completion(id<MTLCommandBuffer> command_buffer);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -1,218 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/util.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/string.h"
|
||||
# include "util/time.h"
|
||||
|
||||
# include <pwd.h>
|
||||
# include <sys/shm.h>
|
||||
# include <time.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
|
||||
{
|
||||
if (device_name.find("Intel") != string::npos) {
|
||||
return METAL_GPU_INTEL;
|
||||
}
|
||||
else if (device_name.find("AMD") != string::npos) {
|
||||
return METAL_GPU_AMD;
|
||||
}
|
||||
else if (device_name.find("Apple") != string::npos) {
|
||||
return METAL_GPU_APPLE;
|
||||
}
|
||||
return METAL_GPU_UNKNOWN;
|
||||
}
|
||||
|
||||
bool MetalInfo::device_version_check(id<MTLDevice> device)
|
||||
{
|
||||
/* Metal Cycles doesn't work correctly on macOS versions older than 12.0 */
|
||||
if (@available(macos 12.0, *)) {
|
||||
MetalGPUVendor vendor = get_vendor_from_device_name([[device name] UTF8String]);
|
||||
|
||||
/* Metal Cycles works on Apple Silicon GPUs at present */
|
||||
return (vendor == METAL_GPU_APPLE);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void MetalInfo::get_usable_devices(vector<MetalPlatformDevice> *usable_devices)
|
||||
{
|
||||
static bool first_time = true;
|
||||
# define FIRST_VLOG(severity) \
|
||||
if (first_time) \
|
||||
VLOG(severity)
|
||||
|
||||
usable_devices->clear();
|
||||
|
||||
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||
for (id<MTLDevice> device in allDevices) {
|
||||
string device_name;
|
||||
if (!get_device_name(device, &device_name)) {
|
||||
FIRST_VLOG(2) << "Failed to get device name, ignoring.";
|
||||
continue;
|
||||
}
|
||||
|
||||
static const char *forceIntelStr = getenv("CYCLES_METAL_FORCE_INTEL");
|
||||
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
|
||||
if (forceIntel && device_name.find("Intel") == string::npos) {
|
||||
FIRST_VLOG(2) << "CYCLES_METAL_FORCE_INTEL causing non-Intel device " << device_name
|
||||
<< " to be ignored.";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!device_version_check(device)) {
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name << " due to too old compiler version.";
|
||||
continue;
|
||||
}
|
||||
FIRST_VLOG(2) << "Adding new device " << device_name << ".";
|
||||
string hardware_id;
|
||||
usable_devices->push_back(MetalPlatformDevice(device, device_name));
|
||||
}
|
||||
first_time = false;
|
||||
}
|
||||
|
||||
bool MetalInfo::get_num_devices(uint32_t *num_devices)
|
||||
{
|
||||
*num_devices = MTLCopyAllDevices().count;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t MetalInfo::get_num_devices()
|
||||
{
|
||||
uint32_t num_devices;
|
||||
if (!get_num_devices(&num_devices)) {
|
||||
return 0;
|
||||
}
|
||||
return num_devices;
|
||||
}
|
||||
|
||||
bool MetalInfo::get_device_name(id<MTLDevice> device, string *platform_name)
|
||||
{
|
||||
*platform_name = [device.name UTF8String];
|
||||
return true;
|
||||
}
|
||||
|
||||
string MetalInfo::get_device_name(id<MTLDevice> device)
|
||||
{
|
||||
string platform_name;
|
||||
if (!get_device_name(device, &platform_name)) {
|
||||
return "";
|
||||
}
|
||||
return platform_name;
|
||||
}
|
||||
|
||||
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
||||
id<MTLCommandBuffer> command_buffer,
|
||||
NSUInteger length,
|
||||
MTLResourceOptions options,
|
||||
const void *pointer,
|
||||
Stats &stats)
|
||||
{
|
||||
id<MTLBuffer> buffer;
|
||||
|
||||
MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
|
||||
MTLResourceStorageModeShift);
|
||||
MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
|
||||
MTLResourceCPUCacheModeShift);
|
||||
|
||||
buffer_mutex.lock();
|
||||
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
|
||||
MetalBufferListEntry bufferEntry = *entry;
|
||||
|
||||
/* Check if buffer matches size and storage mode and is old enough to reuse */
|
||||
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
|
||||
cpuCacheMode == bufferEntry.buffer.cpuCacheMode) {
|
||||
buffer = bufferEntry.buffer;
|
||||
buffer_free_list.erase(entry);
|
||||
bufferEntry.command_buffer = command_buffer;
|
||||
buffer_in_use_list.push_back(bufferEntry);
|
||||
buffer_mutex.unlock();
|
||||
|
||||
/* Copy over data */
|
||||
if (pointer) {
|
||||
memcpy(buffer.contents, pointer, length);
|
||||
if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
|
||||
[buffer didModifyRange:NSMakeRange(0, length)];
|
||||
}
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
// NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
|
||||
if (pointer) {
|
||||
buffer = [device newBufferWithBytes:pointer length:length options:options];
|
||||
}
|
||||
else {
|
||||
buffer = [device newBufferWithLength:length options:options];
|
||||
}
|
||||
|
||||
MetalBufferListEntry buffer_entry(buffer, command_buffer);
|
||||
|
||||
stats.mem_alloc(buffer.allocatedSize);
|
||||
|
||||
total_temp_mem_size += buffer.allocatedSize;
|
||||
buffer_in_use_list.push_back(buffer_entry);
|
||||
buffer_mutex.unlock();
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
|
||||
{
|
||||
assert(command_buffer);
|
||||
thread_scoped_lock lock(buffer_mutex);
|
||||
/* Release all buffers that have not been recently reused back into the free pool */
|
||||
for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
|
||||
MetalBufferListEntry buffer_entry = *entry;
|
||||
if (buffer_entry.command_buffer == command_buffer) {
|
||||
entry = buffer_in_use_list.erase(entry);
|
||||
buffer_entry.command_buffer = nil;
|
||||
buffer_free_list.push_back(buffer_entry);
|
||||
}
|
||||
else {
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MetalBufferPool::~MetalBufferPool()
|
||||
{
|
||||
thread_scoped_lock lock(buffer_mutex);
|
||||
/* Release all buffers that have not been recently reused */
|
||||
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
|
||||
MetalBufferListEntry buffer_entry = *entry;
|
||||
|
||||
id<MTLBuffer> buffer = buffer_entry.buffer;
|
||||
// NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
|
||||
// bufferFreeList.size());
|
||||
total_temp_mem_size -= buffer.allocatedSize;
|
||||
[buffer release];
|
||||
entry = buffer_free_list.erase(entry);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
@@ -124,20 +124,11 @@ class MultiDevice : public Device {
|
||||
return BVH_LAYOUT_MULTI_OPTIX;
|
||||
}
|
||||
|
||||
/* With multiple Metal devices, every device needs its own acceleration structure */
|
||||
if (bvh_layout_mask == BVH_LAYOUT_METAL) {
|
||||
return BVH_LAYOUT_MULTI_METAL;
|
||||
}
|
||||
|
||||
/* When devices do not share a common BVH layout, fall back to creating one for each */
|
||||
const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
|
||||
if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
|
||||
return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
|
||||
}
|
||||
const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
|
||||
if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
|
||||
return BVH_LAYOUT_MULTI_METAL_EMBREE;
|
||||
}
|
||||
|
||||
return bvh_layout_mask;
|
||||
}
|
||||
@@ -160,9 +151,7 @@ class MultiDevice : public Device {
|
||||
}
|
||||
|
||||
assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL ||
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
|
||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);
|
||||
|
||||
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
|
||||
bvh_multi->sub_bvhs.resize(devices.size());
|
||||
@@ -185,14 +174,9 @@ class MultiDevice : public Device {
|
||||
BVHParams params = bvh->params;
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
|
||||
params.bvh_layout = BVH_LAYOUT_OPTIX;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
|
||||
params.bvh_layout = BVH_LAYOUT_METAL;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
|
||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
|
||||
* (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||
|
@@ -28,7 +28,6 @@
|
||||
# include "scene/mesh.h"
|
||||
# include "scene/object.h"
|
||||
# include "scene/pass.h"
|
||||
# include "scene/pointcloud.h"
|
||||
# include "scene/scene.h"
|
||||
|
||||
# include "util/debug.h"
|
||||
@@ -42,19 +41,17 @@
|
||||
# define __KERNEL_OPTIX__
|
||||
# include "kernel/device/optix/globals.h"
|
||||
|
||||
# include <optix_denoiser_tiling.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
||||
: device(device), queue(device), state(device, "__denoiser_state", true)
|
||||
: device(device), queue(device), state(device, "__denoiser_state")
|
||||
{
|
||||
}
|
||||
|
||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: CUDADevice(info, stats, profiler),
|
||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||
launch_params(this, "__params", false),
|
||||
launch_params(this, "__params"),
|
||||
denoiser_(this)
|
||||
{
|
||||
/* Make the CUDA context current. */
|
||||
@@ -211,15 +208,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
}
|
||||
else {
|
||||
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
|
||||
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
|
||||
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
|
||||
}
|
||||
|
||||
module_options.boundValues = nullptr;
|
||||
module_options.numBoundValues = 0;
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
module_options.payloadTypes = nullptr;
|
||||
module_options.numPayloadTypes = 0;
|
||||
# endif
|
||||
|
||||
OptixPipelineCompileOptions pipeline_options = {};
|
||||
/* Default to no motion blur and two-level graph, since it is the fastest option. */
|
||||
@@ -234,18 +227,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
|
||||
# else
|
||||
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
|
||||
# endif
|
||||
}
|
||||
else
|
||||
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
|
||||
}
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
|
||||
}
|
||||
|
||||
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
|
||||
* This is necessary since objects may be reported to have motion if the Vector pass is
|
||||
@@ -338,13 +324,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
/* Built-in thick curve intersection. */
|
||||
OptixBuiltinISOptions builtin_options = {};
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
|
||||
builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
|
||||
builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
|
||||
# else
|
||||
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
|
||||
# endif
|
||||
builtin_options.usesMotionBlur = false;
|
||||
|
||||
optix_assert(optixBuiltinISModuleGet(
|
||||
@@ -376,18 +356,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
}
|
||||
}
|
||||
|
||||
/* Pointclouds */
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
|
||||
group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||
group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
|
||||
group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
|
||||
group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
|
||||
group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||
group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
|
||||
group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
|
||||
}
|
||||
|
||||
if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
|
||||
/* Add hit group for local intersections. */
|
||||
group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||
@@ -435,10 +403,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
|
||||
trace_css = std::max(trace_css,
|
||||
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
|
||||
trace_css = std::max(
|
||||
trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
|
||||
trace_css = std::max(
|
||||
trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
|
||||
|
||||
OptixPipelineLinkOptions link_options = {};
|
||||
link_options.maxTraceDepth = 1;
|
||||
@@ -447,7 +411,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
|
||||
}
|
||||
else {
|
||||
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
|
||||
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
|
||||
}
|
||||
|
||||
if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
@@ -464,10 +428,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||
}
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
|
||||
}
|
||||
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
|
||||
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
|
||||
|
||||
@@ -507,10 +467,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||
}
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
|
||||
}
|
||||
|
||||
optix_assert(optixPipelineCreate(context,
|
||||
&pipeline_options,
|
||||
@@ -551,7 +507,7 @@ class OptiXDevice::DenoiseContext {
|
||||
: denoise_params(task.params),
|
||||
render_buffers(task.render_buffers),
|
||||
buffer_params(task.buffer_params),
|
||||
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||
guiding_buffer(device, "denoiser guiding passes buffer"),
|
||||
num_samples(task.num_samples)
|
||||
{
|
||||
num_input_passes = 1;
|
||||
@@ -566,9 +522,9 @@ class OptiXDevice::DenoiseContext {
|
||||
}
|
||||
}
|
||||
|
||||
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||
const int num_guiding_passes = num_input_passes - 1;
|
||||
|
||||
if (use_guiding_passes) {
|
||||
if (num_guiding_passes) {
|
||||
if (task.allow_inplace_modification) {
|
||||
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
@@ -621,7 +577,6 @@ class OptiXDevice::DenoiseContext {
|
||||
|
||||
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||
int num_input_passes = 0;
|
||||
bool use_guiding_passes = false;
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
|
||||
@@ -698,22 +653,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&context.guiding_params.pass_normal,
|
||||
void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
|
||||
const_cast<int *>(&context.guiding_params.pass_stride),
|
||||
const_cast<int *>(&context.guiding_params.pass_albedo),
|
||||
const_cast<int *>(&context.guiding_params.pass_normal),
|
||||
&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.pass_sample_count,
|
||||
&context.pass_denoising_albedo,
|
||||
&context.pass_denoising_normal,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&context.num_samples);
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&buffer_params.pass_stride),
|
||||
const_cast<int *>(&context.pass_sample_count),
|
||||
const_cast<int *>(&context.pass_denoising_albedo),
|
||||
const_cast<int *>(&context.pass_denoising_normal),
|
||||
const_cast<int *>(&buffer_params.full_x),
|
||||
const_cast<int *>(&buffer_params.full_y),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.height),
|
||||
const_cast<int *>(&context.num_samples)};
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
||||
}
|
||||
@@ -724,11 +679,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height);
|
||||
void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
|
||||
const_cast<int *>(&context.guiding_params.pass_stride),
|
||||
const_cast<int *>(&context.guiding_params.pass_albedo),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.height)};
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
||||
}
|
||||
@@ -753,7 +708,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||
else if (!context.albedo_replaced_with_fake) {
|
||||
context.albedo_replaced_with_fake = true;
|
||||
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||
@@ -824,15 +779,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&pass.denoised_offset);
|
||||
void *args[] = {&context.render_buffers->buffer.device_pointer,
|
||||
const_cast<int *>(&buffer_params.full_x),
|
||||
const_cast<int *>(&buffer_params.full_y),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.height),
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&buffer_params.pass_stride),
|
||||
const_cast<int *>(&pass.denoised_offset)};
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
||||
}
|
||||
@@ -844,20 +799,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.num_samples,
|
||||
&pass.noisy_offset,
|
||||
&pass.denoised_offset,
|
||||
&context.pass_sample_count,
|
||||
&pass.num_components,
|
||||
&pass.use_compositing);
|
||||
void *args[] = {&context.render_buffers->buffer.device_pointer,
|
||||
const_cast<int *>(&buffer_params.full_x),
|
||||
const_cast<int *>(&buffer_params.full_y),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.height),
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&buffer_params.pass_stride),
|
||||
const_cast<int *>(&context.num_samples),
|
||||
const_cast<int *>(&pass.noisy_offset),
|
||||
const_cast<int *>(&pass.denoised_offset),
|
||||
const_cast<int *>(&context.pass_sample_count),
|
||||
const_cast<int *>(&pass.num_components),
|
||||
const_cast<bool *>(&pass.use_compositing)};
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
||||
}
|
||||
@@ -915,33 +870,35 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
|
||||
|
||||
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
||||
{
|
||||
/* Limit maximum tile size denoiser can be invoked with. */
|
||||
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
||||
min(context.buffer_params.height, 4096));
|
||||
|
||||
if (denoiser_.is_configured &&
|
||||
(denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
|
||||
if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width &&
|
||||
denoiser_.configured_size.y == context.buffer_params.height)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
OptixDenoiserSizes sizes = {};
|
||||
optix_assert(optixDenoiserComputeMemoryResources(
|
||||
denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
|
||||
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
|
||||
|
||||
/* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
|
||||
denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
|
||||
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
||||
|
||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||
denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
|
||||
denoiser_.sizes.withOverlapScratchSizeInBytes);
|
||||
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
||||
|
||||
/* Initialize denoiser state for the current tile size. */
|
||||
const OptixResult result = optixDenoiserSetup(
|
||||
denoiser_.optix_denoiser,
|
||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||
on a stream that is not the default stream */
|
||||
tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
|
||||
tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
|
||||
buffer_params.width,
|
||||
buffer_params.height,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.sizes.withOverlapScratchSizeInBytes);
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
||||
denoiser_.scratch_size);
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
set_error("Failed to set up OptiX denoiser");
|
||||
return false;
|
||||
@@ -950,7 +907,8 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
denoiser_.is_configured = true;
|
||||
denoiser_.configured_size = tile_size;
|
||||
denoiser_.configured_size.x = buffer_params.width;
|
||||
denoiser_.configured_size.y = buffer_params.height;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1021,20 +979,18 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
||||
guide_layers.albedo = albedo_layer;
|
||||
guide_layers.normal = normal_layer;
|
||||
|
||||
optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
|
||||
optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
|
||||
denoiser_.queue.stream(),
|
||||
¶ms,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.scratch_offset,
|
||||
&guide_layers,
|
||||
&image_layers,
|
||||
1,
|
||||
denoiser_.state.device_pointer +
|
||||
denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.sizes.withOverlapScratchSizeInBytes,
|
||||
denoiser_.sizes.overlapWindowSizeInPixels,
|
||||
denoiser_.configured_size.x,
|
||||
denoiser_.configured_size.y));
|
||||
0,
|
||||
0,
|
||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
||||
denoiser_.scratch_size));
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1044,13 +1000,6 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
const OptixBuildInput &build_input,
|
||||
uint16_t num_motion_steps)
|
||||
{
|
||||
/* Allocate and build acceleration structures only one at a time, to prevent parallel builds
|
||||
* from running out of memory (since both original and compacted acceleration structure memory
|
||||
* may be allocated at the same time for the duration of this function). The builds would
|
||||
* otherwise happen on the same CUDA stream anyway. */
|
||||
static thread_mutex mutex;
|
||||
thread_scoped_lock lock(mutex);
|
||||
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
||||
@@ -1076,14 +1025,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
|
||||
|
||||
/* Allocate required output buffers. */
|
||||
device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
|
||||
device_only_memory<char> temp_mem(this, "optix temp as build mem");
|
||||
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
|
||||
if (!temp_mem.device_pointer) {
|
||||
/* Make sure temporary memory allocation succeeded. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
|
||||
device_only_memory<char> &out_data = *bvh->as_data;
|
||||
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
||||
assert(out_data.device == this);
|
||||
@@ -1132,13 +1080,12 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
|
||||
/* There is no point compacting if the size does not change. */
|
||||
if (compacted_size < sizes.outputSizeInBytes) {
|
||||
device_only_memory<char> compacted_data(this, "optix compacted as", false);
|
||||
device_only_memory<char> compacted_data(this, "optix compacted as");
|
||||
compacted_data.alloc_to_device(compacted_size);
|
||||
if (!compacted_data.device_pointer) {
|
||||
if (!compacted_data.device_pointer)
|
||||
/* Do not compact if memory allocation for compacted acceleration structure fails.
|
||||
* Can just use the uncompacted one then, so succeed here regardless. */
|
||||
return !have_error();
|
||||
}
|
||||
|
||||
optix_assert(optixAccelCompact(
|
||||
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
|
||||
@@ -1149,8 +1096,6 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
|
||||
std::swap(out_data.device_size, compacted_data.device_size);
|
||||
std::swap(out_data.device_pointer, compacted_data.device_pointer);
|
||||
/* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1233,27 +1178,20 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
int ka = max(k0 - 1, curve.first_key);
|
||||
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
|
||||
|
||||
index_data[i] = i * 4;
|
||||
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
|
||||
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
|
||||
v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
|
||||
v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
|
||||
v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
|
||||
# else
|
||||
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
|
||||
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
|
||||
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
|
||||
const float4 pw = make_float4(
|
||||
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
|
||||
|
||||
/* Convert Catmull-Rom data to B-spline. */
|
||||
/* Convert Catmull-Rom data to Bezier spline. */
|
||||
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
|
||||
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
|
||||
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
|
||||
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
|
||||
|
||||
index_data[i] = i * 4;
|
||||
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
|
||||
v[0] = make_float4(
|
||||
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
|
||||
v[1] = make_float4(
|
||||
@@ -1262,7 +1200,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
|
||||
v[3] = make_float4(
|
||||
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
|
||||
# endif
|
||||
}
|
||||
else {
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
@@ -1304,11 +1241,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
OptixBuildInput build_input = {};
|
||||
if (hair->curve_shape == CURVE_THICK) {
|
||||
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
|
||||
# else
|
||||
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
|
||||
# endif
|
||||
build_input.curveArray.numPrimitives = num_segments;
|
||||
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
|
||||
build_input.curveArray.numVertices = num_vertices;
|
||||
@@ -1322,7 +1255,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
}
|
||||
else {
|
||||
/* Disable visibility test any-hit program, since it is already checked during
|
||||
* intersection. Those trace calls that require any-hit can force it with a ray flag. */
|
||||
* intersection. Those trace calls that require anyhit can force it with a ray flag. */
|
||||
build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
|
||||
|
||||
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
|
||||
@@ -1402,86 +1335,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
build_input.triangleArray.numSbtRecords = 1;
|
||||
build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
|
||||
|
||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||
progress.set_error("Failed to build OptiX acceleration structure");
|
||||
}
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||
/* Build BLAS for points primitives. */
|
||||
PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
|
||||
const size_t num_points = pointcloud->num_points();
|
||||
if (num_points == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t num_motion_steps = 1;
|
||||
Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
|
||||
num_motion_steps = pointcloud->get_motion_steps();
|
||||
}
|
||||
|
||||
device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
|
||||
aabb_data.alloc(num_points * num_motion_steps);
|
||||
|
||||
/* Get AABBs for each motion step. */
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
/* The center step for motion vertices is not stored in the attribute. */
|
||||
const float3 *points = pointcloud->get_points().data();
|
||||
const float *radius = pointcloud->get_radius().data();
|
||||
size_t center_step = (num_motion_steps - 1) / 2;
|
||||
if (step != center_step) {
|
||||
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
||||
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
|
||||
points = motion_points->data_float3() + attr_offset * num_points;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_points; ++i) {
|
||||
const PointCloud::Point point = pointcloud->get_point(i);
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
point.bounds_grow(points, radius, bounds);
|
||||
|
||||
const size_t index = step * num_points + i;
|
||||
aabb_data[index].minX = bounds.min.x;
|
||||
aabb_data[index].minY = bounds.min.y;
|
||||
aabb_data[index].minZ = bounds.min.z;
|
||||
aabb_data[index].maxX = bounds.max.x;
|
||||
aabb_data[index].maxY = bounds.max.y;
|
||||
aabb_data[index].maxZ = bounds.max.z;
|
||||
}
|
||||
}
|
||||
|
||||
/* Upload AABB data to GPU. */
|
||||
aabb_data.copy_to_device();
|
||||
|
||||
vector<device_ptr> aabb_ptrs;
|
||||
aabb_ptrs.reserve(num_motion_steps);
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
|
||||
}
|
||||
|
||||
/* Disable visibility test any-hit program, since it is already checked during
|
||||
* intersection. Those trace calls that require anyhit can force it with a ray flag.
|
||||
* For those, force a single any-hit call, so shadow record-all behavior works correctly. */
|
||||
unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
|
||||
OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
|
||||
OptixBuildInput build_input = {};
|
||||
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
|
||||
# if OPTIX_ABI_VERSION < 23
|
||||
build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
|
||||
build_input.aabbArray.numPrimitives = num_points;
|
||||
build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
|
||||
build_input.aabbArray.flags = &build_flags;
|
||||
build_input.aabbArray.numSbtRecords = 1;
|
||||
build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset;
|
||||
# else
|
||||
build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
|
||||
build_input.customPrimitiveArray.numPrimitives = num_points;
|
||||
build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
|
||||
build_input.customPrimitiveArray.flags = &build_flags;
|
||||
build_input.customPrimitiveArray.numSbtRecords = 1;
|
||||
build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
|
||||
# endif
|
||||
|
||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||
progress.set_error("Failed to build OptiX acceleration structure");
|
||||
}
|
||||
@@ -1569,22 +1422,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
|
||||
}
|
||||
}
|
||||
else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) {
|
||||
/* Use the hit group that has an intersection program for point clouds. */
|
||||
instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
|
||||
|
||||
/* Also skip point clouds in local trace calls. */
|
||||
instance.visibilityMask |= 4;
|
||||
}
|
||||
|
||||
# if OPTIX_ABI_VERSION < 55
|
||||
/* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */
|
||||
else
|
||||
# endif
|
||||
{
|
||||
else {
|
||||
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
|
||||
* since it needs to filter out end-caps there).
|
||||
|
||||
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
|
||||
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
|
||||
*/
|
||||
@@ -1654,6 +1494,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
|
||||
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
|
||||
|
||||
/* Disable instance transform if object uses motion transform already. */
|
||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
|
||||
/* Get traversable handle to motion transform. */
|
||||
optixConvertPointerToTraversableHandle(context,
|
||||
motion_transform_gpu,
|
||||
@@ -1667,6 +1510,10 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
/* Set transform matrix. */
|
||||
memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
|
||||
}
|
||||
else {
|
||||
/* Disable instance transform if geometry already has it applied to vertex data. */
|
||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -44,8 +44,6 @@ enum {
|
||||
PG_HITV, /* __VOLUME__ hit group. */
|
||||
PG_HITD_MOTION,
|
||||
PG_HITS_MOTION,
|
||||
PG_HITD_POINTCLOUD,
|
||||
PG_HITS_POINTCLOUD,
|
||||
PG_CALL_SVM_AO,
|
||||
PG_CALL_SVM_BEVEL,
|
||||
NUM_PROGRAM_GROUPS
|
||||
@@ -54,9 +52,9 @@ enum {
|
||||
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
||||
static const int NUM_MIS_PROGRAM_GROUPS = 1;
|
||||
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
||||
static const int NUM_HIT_PROGRAM_GROUPS = 8;
|
||||
static const int NUM_HIT_PROGRAM_GROUPS = 6;
|
||||
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
||||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
|
||||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;
|
||||
|
||||
/* List of OptiX pipelines. */
|
||||
enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
|
||||
@@ -100,7 +98,8 @@ class OptiXDevice : public CUDADevice {
|
||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||
device_only_memory<unsigned char> state;
|
||||
OptixDenoiserSizes sizes = {};
|
||||
size_t scratch_offset = 0;
|
||||
size_t scratch_size = 0;
|
||||
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
|
@@ -47,9 +47,7 @@ static bool is_optix_specific_kernel(DeviceKernel kernel)
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
}
|
||||
|
||||
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args)
|
||||
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
||||
{
|
||||
if (!is_optix_specific_kernel(kernel)) {
|
||||
return CUDADeviceQueue::enqueue(kernel, work_size, args);
|
||||
@@ -71,7 +69,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array),
|
||||
args.values[0], // &d_path_index
|
||||
args[0], // &d_path_index
|
||||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
|
||||
@@ -80,7 +78,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||
args.values[1], // &d_render_buffer
|
||||
args[1], // &d_render_buffer
|
||||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
}
|
||||
|
@@ -31,9 +31,7 @@ class OptiXDeviceQueue : public CUDADeviceQueue {
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args) override;
|
||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -31,72 +31,6 @@ class device_memory;
|
||||
|
||||
struct KernelWorkTile;
|
||||
|
||||
/* Container for device kernel arguments with type correctness ensured by API. */
|
||||
struct DeviceKernelArguments {
|
||||
|
||||
enum Type {
|
||||
POINTER,
|
||||
INT32,
|
||||
FLOAT32,
|
||||
BOOLEAN,
|
||||
KERNEL_FILM_CONVERT,
|
||||
};
|
||||
|
||||
static const int MAX_ARGS = 16;
|
||||
Type types[MAX_ARGS];
|
||||
void *values[MAX_ARGS];
|
||||
size_t sizes[MAX_ARGS];
|
||||
size_t count = 0;
|
||||
|
||||
DeviceKernelArguments()
|
||||
{
|
||||
}
|
||||
|
||||
template<class T> DeviceKernelArguments(const T *arg)
|
||||
{
|
||||
add(arg);
|
||||
}
|
||||
|
||||
template<class T, class... Args> DeviceKernelArguments(const T *first, Args... args)
|
||||
{
|
||||
add(first);
|
||||
add(args...);
|
||||
}
|
||||
|
||||
void add(const KernelFilmConvert *value)
|
||||
{
|
||||
add(KERNEL_FILM_CONVERT, value, sizeof(KernelFilmConvert));
|
||||
}
|
||||
void add(const device_ptr *value)
|
||||
{
|
||||
add(POINTER, value, sizeof(device_ptr));
|
||||
}
|
||||
void add(const int32_t *value)
|
||||
{
|
||||
add(INT32, value, sizeof(int32_t));
|
||||
}
|
||||
void add(const float *value)
|
||||
{
|
||||
add(FLOAT32, value, sizeof(float));
|
||||
}
|
||||
void add(const bool *value)
|
||||
{
|
||||
add(BOOLEAN, value, 4);
|
||||
}
|
||||
void add(const Type type, const void *value, size_t size)
|
||||
{
|
||||
types[count] = type;
|
||||
values[count] = (void *)value;
|
||||
sizes[count] = size;
|
||||
count++;
|
||||
}
|
||||
template<typename T, typename... Args> void add(const T *first, Args... args)
|
||||
{
|
||||
add(first);
|
||||
add(args...);
|
||||
}
|
||||
};
|
||||
|
||||
/* Abstraction of a command queue for a device.
|
||||
* Provides API to schedule kernel execution in a specific queue with minimal possible overhead
|
||||
* from driver side.
|
||||
@@ -132,9 +66,7 @@ class DeviceQueue {
|
||||
* - int: pass pointer to the int
|
||||
* - device memory: pass pointer to device_memory.device_pointer
|
||||
* Return false if there was an error executing this or a previous kernel. */
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args) = 0;
|
||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0;
|
||||
|
||||
/* Wait unit all enqueued kernels have finished execution.
|
||||
* Return false if there was an error executing any of the enqueued kernels. */
|
||||
|
@@ -31,7 +31,7 @@ struct Node;
|
||||
struct NodeType;
|
||||
struct Transform;
|
||||
|
||||
/* NOTE: in the following macros we use "type const &" instead of "const type &"
|
||||
/* Note: in the following macros we use "type const &" instead of "const type &"
|
||||
* to avoid issues when pasting a pointer type. */
|
||||
#define NODE_SOCKET_API_BASE_METHODS(type_, name, string_name) \
|
||||
const SocketType *get_##name##_socket() const \
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user