Compare commits
102 Commits
tmp-vulkan
...
temp-virtu
Author | SHA1 | Date | |
---|---|---|---|
e1c1c65578 | |||
2154631e85 | |||
71b7e59e28 | |||
7106fe299e | |||
4b8e52bdbc | |||
462d3bde6e | |||
e354ff5b86 | |||
27da814444 | |||
493571c3c3 | |||
a6eb04cce4 | |||
139f69b3b9 | |||
8a1cc2f291 | |||
572fa82cc9 | |||
c8ab4dcce1 | |||
55a6af1c1e | |||
0827245a91 | |||
2f284f42b6 | |||
1131bf6ec6 | |||
2b0e890a55 | |||
c4d3380d2e | |||
05cef9da88 | |||
43a53f97c4 | |||
1d7a893d12 | |||
5e60ba93f7 | |||
65258dc98f | |||
d449e66c1d | |||
01fe332b9d | |||
d6b0f3a3a4 | |||
928b7b5340 | |||
aa22839aa1 | |||
78daf1b697 | |||
dcd9b21d07 | |||
42b350ed7f | |||
e79c7e088e | |||
99c5b788eb | |||
a2b5a74d49 | |||
84626dfa16 | |||
3d0abb3be9 | |||
9b6c13e66c | |||
7f4273d373 | |||
d1b8d6acd3 | |||
2267f19486 | |||
acd8874205 | |||
a69bd34fad | |||
2516fc953a | |||
50f93cbf52 | |||
8af541dcfd | |||
f8c0682a67 | |||
b40e753c81 | |||
2cff0676af | |||
b0444a347b | |||
becfc547b9 | |||
6a558a18f9 | |||
c066102f30 | |||
ba49545060 | |||
ca70c0521a | |||
4323b3f592 | |||
d1e6606d4d | |||
b64ad0b30a | |||
ed4b15ecd3 | |||
f79a673bf1 | |||
1438f16663 | |||
d613451c3a | |||
3f8ac0e5ef | |||
56bd1f46bd | |||
1e4cbec720 | |||
f9b669c588 | |||
1ac0a2db11 | |||
771cce22f1 | |||
c1a1046ed7 | |||
f430e7850a | |||
116eb09681 | |||
841d393c15 | |||
bc1acbcc5a | |||
cd49f68db6 | |||
5b51065347 | |||
e736fe67c9 | |||
1d20f60616 | |||
91b58c95ac | |||
955b01e359 | |||
1b3758f205 | |||
33870b21f3 | |||
e49f70745b | |||
c48a99542d | |||
68b5670268 | |||
dea268de72 | |||
399463c548 | |||
1d94de4ac3 | |||
0453361d20 | |||
b21a3ae6ad | |||
edb8ccf31f | |||
7af712c343 | |||
a9e0cb6ec8 | |||
5a7b30f8f9 | |||
024c07af84 | |||
c256298924 | |||
a11950531f | |||
ae01d1db98 | |||
f3a5b31196 | |||
8d1a5fcfea | |||
39c4103f9e | |||
4e03ac6b25 |
@@ -411,7 +411,6 @@ option(WITH_CYCLES "Enable Cycles Render Engine" ON)
|
||||
option(WITH_CYCLES_OSL "Build Cycles with OpenShadingLanguage support" ON)
|
||||
option(WITH_CYCLES_EMBREE "Build Cycles with Embree support" ON)
|
||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||
option(WITH_CYCLES_DEBUG "Build Cycles with options useful for debugging (e.g., MIS)" OFF)
|
||||
|
||||
option(WITH_CYCLES_STANDALONE "Build Cycles standalone application" OFF)
|
||||
option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF)
|
||||
@@ -441,11 +440,7 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||
|
||||
# AMD HIP
|
||||
if(WIN32)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
else()
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||
endif()
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
@@ -514,13 +509,9 @@ if(UNIX AND NOT APPLE)
|
||||
endif()
|
||||
|
||||
|
||||
# Vulkan
|
||||
option(WITH_VULKAN "Enable Vulkan backend (Experimental)" OFF)
|
||||
option(WITH_VULKAN_SHADER_COMPILATION "Temporary flag to enable vulkan shader compilation needed to continue development during the migration of GLSL to Vulkan." OFF)
|
||||
|
||||
# OpenGL
|
||||
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu (temporary option for development purposes)" ON)
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
|
||||
option(WITH_GLEW_ES "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
|
||||
option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)" OFF)
|
||||
option(WITH_GL_PROFILE_ES20 "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)" OFF)
|
||||
@@ -530,7 +521,6 @@ mark_as_advanced(
|
||||
WITH_GLEW_ES
|
||||
WITH_GL_EGL
|
||||
WITH_GL_PROFILE_ES20
|
||||
WITH_VULKAN_SHADER_COMPILATION
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
@@ -652,7 +642,7 @@ if(WIN32)
|
||||
option(WITH_WINDOWS_PDB "Generate a pdb file for client side stacktraces" ON)
|
||||
mark_as_advanced(WITH_WINDOWS_PDB)
|
||||
|
||||
option(WITH_WINDOWS_STRIPPED_PDB "Use a stripped PDB file" ON)
|
||||
option(WITH_WINDOWS_STRIPPED_PDB "Use a stripped PDB file" On)
|
||||
mark_as_advanced(WITH_WINDOWS_STRIPPED_PDB)
|
||||
|
||||
endif()
|
||||
@@ -1075,7 +1065,7 @@ if(MSVC)
|
||||
add_definitions(-D__LITTLE_ENDIAN__)
|
||||
|
||||
# OSX-Note: as we do cross-compiling with specific set architecture,
|
||||
# endianness-detection and auto-setting is counterproductive
|
||||
# endianess-detection and auto-setting is counterproductive
|
||||
# so we just set endianness according CMAKE_OSX_ARCHITECTURES
|
||||
|
||||
elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64 OR CMAKE_OSX_ARCHITECTURES MATCHES arm64)
|
||||
@@ -1131,18 +1121,6 @@ if(WITH_OPENVDB)
|
||||
list(APPEND OPENVDB_LIBRARIES ${BOOST_LIBRARIES} ${TBB_LIBRARIES})
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure Vulkan.
|
||||
|
||||
if(WITH_VULKAN)
|
||||
list(APPEND BLENDER_GL_LIBRARIES ${Vulkan_LIBRARY})
|
||||
|
||||
add_definitions(-DWITH_VULKAN)
|
||||
if(WITH_VULKAN_SHADER_COMPILATION)
|
||||
add_definitions(-DWITH_VULKAN_SHADER_COMPILATION)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure OpenGL.
|
||||
|
||||
@@ -1777,7 +1755,7 @@ endif()
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
# If C++17 is not available, downgrading to an earlier standard is NOT OK.
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
# Do not enable compiler specific language extensions.
|
||||
# Do not enable compiler specific language extentions.
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
# Make MSVC properly report the value of the __cplusplus preprocessor macro
|
||||
|
@@ -51,7 +51,7 @@ Other Convenience Targets
|
||||
* config: Run cmake configuration tool to set build options.
|
||||
* deps: Build library dependencies (intended only for platform maintainers).
|
||||
|
||||
The existance of locally build dependencies overrides the pre-built dependencies from subversion.
|
||||
The existance of locally build dependancies overrides the pre-built dependencies from subversion.
|
||||
These must be manually removed from '../lib/' to go back to using the pre-compiled libraries.
|
||||
|
||||
Project Files
|
||||
|
@@ -17,7 +17,7 @@
|
||||
# ***** END GPL LICENSE BLOCK *****
|
||||
|
||||
########################################################################
|
||||
# Copy all generated files to the proper structure as blender prefers
|
||||
# Copy all generated files to the proper strucure as blender prefers
|
||||
########################################################################
|
||||
|
||||
if(NOT DEFINED HARVEST_TARGET)
|
||||
|
@@ -42,7 +42,6 @@ ExternalProject_Add(nanovdb
|
||||
URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
|
||||
PREFIX ${BUILD_DIR}/nanovdb
|
||||
SOURCE_SUBDIR nanovdb
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/nanovdb/src/nanovdb < ${PATCH_DIR}/nanovdb.diff
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/nanovdb
|
||||
)
|
||||
|
@@ -39,7 +39,7 @@ endif()
|
||||
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
|
||||
# This path must be hard-coded like this, so that the GNUmakefile knows where it is and can pass it to make_source_archive.py:
|
||||
set(PACKAGE_DIR "${CMAKE_CURRENT_BINARY_DIR}/packages")
|
||||
option(PACKAGE_USE_UPSTREAM_SOURCES "Use sources upstream to download the package sources, when OFF the blender mirror will be used" ON)
|
||||
option(PACKAGE_USE_UPSTREAM_SOURCES "Use soures upstream to download the package sources, when OFF the blender mirror will be used" ON)
|
||||
|
||||
file(TO_CMAKE_PATH ${DOWNLOAD_DIR} DOWNLOAD_DIR)
|
||||
file(TO_CMAKE_PATH ${PACKAGE_DIR} PACKAGE_DIR)
|
||||
|
@@ -24,7 +24,7 @@ if(MSVC)
|
||||
add_custom_command(
|
||||
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
COMMAND echo packaging python
|
||||
COMMAND echo this should output at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
COMMAND echo this should ouput at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python.exe ${PYTARGET}/bin/python.exe
|
||||
@@ -43,7 +43,7 @@ if(MSVC)
|
||||
add_custom_command(
|
||||
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
COMMAND echo packaging python
|
||||
COMMAND echo this should output at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
COMMAND echo this should ouput at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python${PYTHON_POSTFIX}.exe ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
|
||||
|
@@ -1826,7 +1826,7 @@ compile_OCIO() {
|
||||
# Force linking against static libs
|
||||
#rm -f $_inst/lib/*.so*
|
||||
|
||||
# Additional dependencies
|
||||
# Additional depencencies
|
||||
#cp ext/dist/lib/libtinyxml.a $_inst/lib
|
||||
#cp ext/dist/lib/libyaml-cpp.a $_inst/lib
|
||||
|
||||
|
@@ -1,374 +0,0 @@
|
||||
Index: nanovdb/nanovdb/NanoVDB.h
|
||||
===================================================================
|
||||
--- a/nanovdb/nanovdb/NanoVDB.h (revision 62751)
|
||||
+++ b/nanovdb/nanovdb/NanoVDB.h (working copy)
|
||||
@@ -152,8 +152,8 @@
|
||||
|
||||
#endif // __CUDACC_RTC__
|
||||
|
||||
-#ifdef __CUDACC__
|
||||
-// Only define __hostdev__ when using NVIDIA CUDA compiler
|
||||
+#if defined(__CUDACC__) || defined(__HIP__)
|
||||
+// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler
|
||||
#define __hostdev__ __host__ __device__
|
||||
#else
|
||||
#define __hostdev__
|
||||
@@ -461,7 +461,7 @@
|
||||
/// Maximum floating-point values
|
||||
template<typename T>
|
||||
struct Maximum;
|
||||
-#ifdef __CUDA_ARCH__
|
||||
+#if defined(__CUDA_ARCH__) || defined(__HIP__)
|
||||
template<>
|
||||
struct Maximum<int>
|
||||
{
|
||||
@@ -1006,10 +1006,10 @@
|
||||
using Vec3i = Vec3<int>;
|
||||
|
||||
/// @brief Return a single precision floating-point vector of this coordinate
|
||||
-Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
|
||||
+inline __hostdev__ Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
|
||||
|
||||
/// @brief Return a double precision floating-point vector of this coordinate
|
||||
-Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
|
||||
+inline __hostdev__ Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
|
||||
|
||||
// ----------------------------> Vec4 <--------------------------------------
|
||||
|
||||
@@ -1820,7 +1820,7 @@
|
||||
}; // Map
|
||||
|
||||
template<typename Mat4T>
|
||||
-void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
|
||||
+__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
|
||||
{
|
||||
float * mf = mMatF, *vf = mVecF;
|
||||
float* mif = mInvMatF;
|
||||
@@ -2170,7 +2170,7 @@
|
||||
}; // Class Grid
|
||||
|
||||
template<typename TreeT>
|
||||
-int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
|
||||
+__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
|
||||
{
|
||||
for (uint32_t i = 0, n = blindDataCount(); i < n; ++i)
|
||||
if (blindMetaData(i).mSemantic == semantic)
|
||||
@@ -2328,7 +2328,7 @@
|
||||
}; // Tree class
|
||||
|
||||
template<typename RootT>
|
||||
-void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
|
||||
+__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
|
||||
{
|
||||
min = this->root().valueMin();
|
||||
max = this->root().valueMax();
|
||||
@@ -2336,7 +2336,7 @@
|
||||
|
||||
template<typename RootT>
|
||||
template<typename NodeT>
|
||||
-const NodeT* Tree<RootT>::getNode(uint32_t i) const
|
||||
+__hostdev__ const NodeT* Tree<RootT>::getNode(uint32_t i) const
|
||||
{
|
||||
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: unvalid node type");
|
||||
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
|
||||
@@ -2345,7 +2345,7 @@
|
||||
|
||||
template<typename RootT>
|
||||
template<int LEVEL>
|
||||
-const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
|
||||
+__hostdev__ const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
|
||||
{
|
||||
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
|
||||
return reinterpret_cast<const TreeNodeT<LEVEL>*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
|
||||
@@ -2353,7 +2353,7 @@
|
||||
|
||||
template<typename RootT>
|
||||
template<typename NodeT>
|
||||
-NodeT* Tree<RootT>::getNode(uint32_t i)
|
||||
+__hostdev__ NodeT* Tree<RootT>::getNode(uint32_t i)
|
||||
{
|
||||
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: invalid node type");
|
||||
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
|
||||
@@ -2362,7 +2362,7 @@
|
||||
|
||||
template<typename RootT>
|
||||
template<int LEVEL>
|
||||
-typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
|
||||
+__hostdev__ typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
|
||||
{
|
||||
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
|
||||
return reinterpret_cast<TreeNodeT<LEVEL>*>(reinterpret_cast<uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
|
||||
@@ -2370,7 +2370,7 @@
|
||||
|
||||
template<typename RootT>
|
||||
template<typename NodeT>
|
||||
-uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
|
||||
+__hostdev__ uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
|
||||
{
|
||||
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNodeID: invalid node type");
|
||||
const NodeT* first = reinterpret_cast<const NodeT*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[NodeT::LEVEL]);
|
||||
@@ -2380,7 +2380,7 @@
|
||||
|
||||
template<typename RootT>
|
||||
template<typename NodeT>
|
||||
-uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
|
||||
+__hostdev__ uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
|
||||
{
|
||||
return this->getNodeID(node) + DataType::mPFSum[NodeT::LEVEL];
|
||||
}
|
||||
@@ -3366,7 +3366,7 @@
|
||||
}; // LeafNode class
|
||||
|
||||
template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
|
||||
-inline void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
|
||||
+inline __hostdev__ void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
|
||||
{
|
||||
static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
|
||||
if (!this->isActive()) return;
|
||||
Index: nanovdb/nanovdb/util/SampleFromVoxels.h
|
||||
===================================================================
|
||||
--- a/nanovdb/nanovdb/util/SampleFromVoxels.h (revision 62751)
|
||||
+++ b/nanovdb/nanovdb/util/SampleFromVoxels.h (working copy)
|
||||
@@ -22,7 +22,7 @@
|
||||
#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
|
||||
|
||||
// Only define __hostdev__ when compiling as NVIDIA CUDA
|
||||
-#ifdef __CUDACC__
|
||||
+#if defined(__CUDACC__) || defined(__HIP__)
|
||||
#define __hostdev__ __host__ __device__
|
||||
#else
|
||||
#include <cmath> // for floor
|
||||
@@ -136,7 +136,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
|
||||
{
|
||||
const CoordT ijk = Round<CoordT>(xyz);
|
||||
if (ijk != mPos) {
|
||||
@@ -147,7 +147,7 @@
|
||||
}
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
|
||||
{
|
||||
if (ijk != mPos) {
|
||||
mPos = ijk;
|
||||
@@ -158,7 +158,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
|
||||
{
|
||||
return mAcc.getValue(Round<CoordT>(xyz));
|
||||
}
|
||||
@@ -195,7 +195,7 @@
|
||||
}; // TrilinearSamplerBase
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
|
||||
+__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
|
||||
{
|
||||
v[0][0][0] = mAcc.getValue(ijk); // i, j, k
|
||||
|
||||
@@ -224,7 +224,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||
+__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||
{
|
||||
#if 0
|
||||
auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
|
||||
@@ -239,7 +239,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||
{
|
||||
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
|
||||
#if 0
|
||||
@@ -270,7 +270,7 @@
|
||||
}
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
|
||||
+__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
|
||||
{
|
||||
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
|
||||
const bool less = v[0][0][0] < ValueT(0);
|
||||
@@ -363,7 +363,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
|
||||
{
|
||||
this->cache(xyz);
|
||||
return BaseT::sample(xyz, mVal);
|
||||
@@ -370,7 +370,7 @@
|
||||
}
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
|
||||
{
|
||||
return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
|
||||
}
|
||||
@@ -377,7 +377,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
|
||||
{
|
||||
this->cache(xyz);
|
||||
return BaseT::gradient(xyz, mVal);
|
||||
@@ -393,7 +393,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
|
||||
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
|
||||
{
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
if (ijk != mPos) {
|
||||
@@ -406,7 +406,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||
{
|
||||
ValueT val[2][2][2];
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
@@ -418,7 +418,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||
{
|
||||
auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
|
||||
|
||||
@@ -463,7 +463,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
|
||||
+inline __hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
|
||||
{
|
||||
ValueT val[2][2][2];
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
@@ -473,7 +473,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||
{
|
||||
ValueT val[2][2][2];
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
@@ -510,7 +510,7 @@
|
||||
}; // TriquadraticSamplerBase
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
|
||||
+__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
|
||||
{
|
||||
CoordT p(ijk[0] - 1, 0, 0);
|
||||
for (int dx = 0; dx < 3; ++dx, ++p[0]) {
|
||||
@@ -526,7 +526,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
|
||||
+__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
|
||||
{
|
||||
auto kernel = [](const ValueT* value, double weight)->ValueT {
|
||||
return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) +
|
||||
@@ -545,7 +545,7 @@
|
||||
}
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
|
||||
+__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
|
||||
{
|
||||
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
|
||||
const bool less = v[0][0][0] < ValueT(0);
|
||||
@@ -624,7 +624,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
|
||||
{
|
||||
this->cache(xyz);
|
||||
return BaseT::sample(xyz, mVal);
|
||||
@@ -631,7 +631,7 @@
|
||||
}
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
|
||||
{
|
||||
return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
|
||||
}
|
||||
@@ -646,7 +646,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
|
||||
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
|
||||
{
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
if (ijk != mPos) {
|
||||
@@ -657,7 +657,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
|
||||
{
|
||||
ValueT val[3][3][3];
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
@@ -667,7 +667,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||
{
|
||||
ValueT val[3][3][3];
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
@@ -710,7 +710,7 @@
|
||||
}; // TricubicSampler
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
-void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
|
||||
+__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
|
||||
{
|
||||
auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
|
||||
|
||||
@@ -929,7 +929,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
|
||||
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
|
||||
{
|
||||
this->cache(xyz);
|
||||
return BaseT::sample(xyz, mC);
|
||||
@@ -937,7 +937,7 @@
|
||||
|
||||
template<typename TreeOrAccT>
|
||||
template<typename RealT, template<typename...> class Vec3T>
|
||||
-void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
|
||||
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
|
||||
{
|
||||
CoordT ijk = Floor<CoordT>(xyz);
|
||||
if (ijk != mPos) {
|
@@ -1,66 +0,0 @@
|
||||
# - Find SHADERC library
|
||||
# Find the native Haru includes and library
|
||||
# This module defines
|
||||
# SHADERC_INCLUDE_DIRS, where to find hpdf.h, set when
|
||||
# SHADERC_INCLUDE_DIR is found.
|
||||
# SHADERC_LIBRARIES, libraries to link against to use Haru.
|
||||
# SHADERC_ROOT_DIR, The base directory to search for Haru.
|
||||
# This can also be an environment variable.
|
||||
# SHADERC_FOUND, If false, do not try to use Haru.
|
||||
#
|
||||
# also defined, but not for general use are
|
||||
# SHADERC_LIBRARY, where to find the Haru library.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2021 Blender Foundation.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD 3-Clause License,
|
||||
# see accompanying file BSD-3-Clause-license.txt for details.
|
||||
#=============================================================================
|
||||
|
||||
# If SHADERC_ROOT_DIR was defined in the environment, use it.
|
||||
if(NOT SHADERC_ROOT_DIR AND NOT $ENV{SHADERC_ROOT_DIR} STREQUAL "")
|
||||
set(SHADERC_ROOT_DIR $ENV{SHADERC_ROOT_DIR})
|
||||
endif()
|
||||
|
||||
set(_shaderc_SEARCH_DIRS
|
||||
${SHADERC_ROOT_DIR}
|
||||
/opt/lib/haru
|
||||
)
|
||||
|
||||
find_path(SHADERC_INCLUDE_DIR
|
||||
NAMES
|
||||
shaderc.hpp
|
||||
HINTS
|
||||
${_shaderc_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
include/shaderc
|
||||
include
|
||||
)
|
||||
|
||||
find_library(SHADERC_LIBRARY
|
||||
NAMES
|
||||
shaderc_combined
|
||||
shaderc
|
||||
HINTS
|
||||
${_shaderc_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
lib64 lib
|
||||
)
|
||||
|
||||
# Handle the QUIETLY and REQUIRED arguments and set SHADERC_FOUND to TRUE if
|
||||
# all listed variables are TRUE.
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ShaderC DEFAULT_MSG SHADERC_LIBRARY SHADERC_INCLUDE_DIR)
|
||||
|
||||
if(SHADERC_FOUND)
|
||||
set(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
|
||||
set(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
mark_as_advanced(
|
||||
SHADERC_INCLUDE_DIR
|
||||
SHADERC_LIBRARY
|
||||
)
|
||||
|
||||
unset(_shaderc_SEARCH_DIRS)
|
@@ -168,7 +168,7 @@ def function_parm_wash_tokens(parm):
|
||||
# if tokens[-1].kind == To
|
||||
# remove trailing char
|
||||
if tokens[-1].kind == TokenKind.PUNCTUATION:
|
||||
if tokens[-1].spelling in {",", ")", ";"}:
|
||||
if tokens[-1].spelling in (",", ")", ";"):
|
||||
tokens.pop()
|
||||
# else:
|
||||
# print(tokens[-1].spelling)
|
||||
@@ -179,7 +179,7 @@ def function_parm_wash_tokens(parm):
|
||||
t_spelling = t.spelling
|
||||
ok = True
|
||||
if t_kind == TokenKind.KEYWORD:
|
||||
if t_spelling in {"const", "restrict", "volatile"}:
|
||||
if t_spelling in ("const", "restrict", "volatile"):
|
||||
ok = False
|
||||
elif t_spelling.startswith("__"):
|
||||
ok = False # __restrict
|
||||
|
@@ -180,7 +180,7 @@ def create_nb_project_main():
|
||||
f.write(' </logicalFolder>\n')
|
||||
|
||||
f.write(' </logicalFolder>\n')
|
||||
# default, but this dir is in fact not in blender dir so we can ignore it
|
||||
# default, but this dir is infact not in blender dir so we can ignore it
|
||||
# f.write(' <sourceFolderFilter>^(nbproject)$</sourceFolderFilter>\n')
|
||||
f.write(r' <sourceFolderFilter>^(nbproject|__pycache__|.*\.py|.*\.html|.*\.blend)$</sourceFolderFilter>\n')
|
||||
|
||||
|
@@ -81,5 +81,4 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@@ -529,7 +529,7 @@ function(SETUP_LIBDIRS)
|
||||
|
||||
# NOTE: For all new libraries, use absolute library paths.
|
||||
# This should eventually be phased out.
|
||||
# APPLE platform uses full paths for linking libraries, and avoids link_directories.
|
||||
# APPLE plaform uses full paths for linking libraries, and avoids link_directories.
|
||||
if(NOT MSVC AND NOT APPLE)
|
||||
link_directories(${JPEG_LIBPATH} ${PNG_LIBPATH} ${ZLIB_LIBPATH} ${FREETYPE_LIBPATH})
|
||||
|
||||
|
@@ -102,11 +102,6 @@ find_package_wrapper(ZLIB REQUIRED)
|
||||
find_package_wrapper(Zstd REQUIRED)
|
||||
find_package_wrapper(Freetype REQUIRED)
|
||||
|
||||
if(WITH_VULKAN)
|
||||
find_package_wrapper(Vulkan REQUIRED)
|
||||
find_package(ShaderC REQUIRED)
|
||||
endif()
|
||||
|
||||
if(WITH_PYTHON)
|
||||
# No way to set py35, remove for now.
|
||||
# find_package(PythonLibs)
|
||||
|
@@ -27,7 +27,7 @@ if(NOT MSVC)
|
||||
endif()
|
||||
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(MSVC_CLANG ON)
|
||||
set(MSVC_CLANG On)
|
||||
set(VC_TOOLS_DIR $ENV{VCToolsRedistDir} CACHE STRING "Location of the msvc redistributables")
|
||||
set(MSVC_REDIST_DIR ${VC_TOOLS_DIR})
|
||||
if(DEFINED MSVC_REDIST_DIR)
|
||||
@@ -53,7 +53,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
endif()
|
||||
if(WITH_WINDOWS_STRIPPED_PDB)
|
||||
message(WARNING "stripped pdb not supported with clang, disabling..")
|
||||
set(WITH_WINDOWS_STRIPPED_PDB OFF)
|
||||
set(WITH_WINDOWS_STRIPPED_PDB Off)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -159,7 +159,7 @@ endif()
|
||||
if(WITH_COMPILER_ASAN AND MSVC AND NOT MSVC_CLANG)
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 19.28.29828)
|
||||
#set a flag so we don't have to do this comparison all the time
|
||||
SET(MSVC_ASAN ON)
|
||||
SET(MSVC_ASAN On)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fsanitize=address")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fsanitize=address")
|
||||
string(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /INCREMENTAL:NO")
|
||||
@@ -179,22 +179,22 @@ endif()
|
||||
|
||||
if(WITH_WINDOWS_SCCACHE AND CMAKE_VS_MSBUILD_COMMAND)
|
||||
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
|
||||
set(WITH_WINDOWS_SCCACHE OFF)
|
||||
set(WITH_WINDOWS_SCCACHE Off)
|
||||
endif()
|
||||
|
||||
# Debug Symbol format
|
||||
# sccache # MSVC_ASAN # format # why
|
||||
# ON # ON # Z7 # sccache will only play nice with Z7
|
||||
# ON # OFF # Z7 # sccache will only play nice with Z7
|
||||
# OFF # ON # Zi # Asan will not play nice with Edit and Continue
|
||||
# OFF # OFF # ZI # Neither asan nor sscache is enabled Edit and Continue is available
|
||||
# On # On # Z7 # sccache will only play nice with Z7
|
||||
# On # Off # Z7 # sccache will only play nice with Z7
|
||||
# Off # On # Zi # Asan will not play nice with Edit and Continue
|
||||
# Off # Off # ZI # Neither asan nor sscache is enabled Edit and Continue is available
|
||||
|
||||
# Release Symbol format
|
||||
# sccache # MSVC_ASAN # format # why
|
||||
# ON # ON # Z7 # sccache will only play nice with Z7
|
||||
# ON # OFF # Z7 # sccache will only play nice with Z7
|
||||
# OFF # ON # Zi # Asan will not play nice with Edit and Continue
|
||||
# OFF # OFF # Zi # Edit and Continue disables some optimizations
|
||||
# On # On # Z7 # sccache will only play nice with Z7
|
||||
# On # Off # Z7 # sccache will only play nice with Z7
|
||||
# Off # On # Zi # Asan will not play nice with Edit and Continue
|
||||
# Off # Off # Zi # Edit and Continue disables some optimizations
|
||||
|
||||
|
||||
if(WITH_WINDOWS_SCCACHE)
|
||||
@@ -288,7 +288,7 @@ if(CMAKE_GENERATOR MATCHES "^Visual Studio.+" AND # Only supported in the VS IDE
|
||||
"EnableMicrosoftCodeAnalysis=false"
|
||||
"EnableClangTidyCodeAnalysis=true"
|
||||
)
|
||||
set(VS_CLANG_TIDY ON)
|
||||
set(VS_CLANG_TIDY On)
|
||||
endif()
|
||||
|
||||
# Mark libdir as system headers with a lower warn level, to resolve some warnings
|
||||
@@ -469,7 +469,7 @@ if(WITH_PYTHON)
|
||||
|
||||
set(PYTHON_INCLUDE_DIR ${LIBDIR}/python/${_PYTHON_VERSION_NO_DOTS}/include)
|
||||
set(PYTHON_NUMPY_INCLUDE_DIRS ${LIBDIR}/python/${_PYTHON_VERSION_NO_DOTS}/lib/site-packages/numpy/core/include)
|
||||
set(NUMPY_FOUND ON)
|
||||
set(NUMPY_FOUND On)
|
||||
unset(_PYTHON_VERSION_NO_DOTS)
|
||||
# uncached vars
|
||||
set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
|
||||
@@ -853,18 +853,18 @@ if(WITH_GMP)
|
||||
set(GMP_INCLUDE_DIRS ${LIBDIR}/gmp/include)
|
||||
set(GMP_LIBRARIES ${LIBDIR}/gmp/lib/libgmp-10.lib optimized ${LIBDIR}/gmp/lib/libgmpxx.lib debug ${LIBDIR}/gmp/lib/libgmpxx_d.lib)
|
||||
set(GMP_ROOT_DIR ${LIBDIR}/gmp)
|
||||
set(GMP_FOUND ON)
|
||||
set(GMP_FOUND On)
|
||||
endif()
|
||||
|
||||
if(WITH_POTRACE)
|
||||
set(POTRACE_INCLUDE_DIRS ${LIBDIR}/potrace/include)
|
||||
set(POTRACE_LIBRARIES ${LIBDIR}/potrace/lib/potrace.lib)
|
||||
set(POTRACE_FOUND ON)
|
||||
set(POTRACE_FOUND On)
|
||||
endif()
|
||||
|
||||
if(WITH_HARU)
|
||||
if(EXISTS ${LIBDIR}/haru)
|
||||
set(HARU_FOUND ON)
|
||||
set(HARU_FOUND On)
|
||||
set(HARU_ROOT_DIR ${LIBDIR}/haru)
|
||||
set(HARU_INCLUDE_DIRS ${HARU_ROOT_DIR}/include)
|
||||
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
|
||||
@@ -874,32 +874,5 @@ if(WITH_HARU)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
if(EXISTS ${LIBDIR}/vulkan)
|
||||
set(Vulkan_FOUND On)
|
||||
set(Vulkan_ROOT_DIR ${LIBDIR}/vulkan)
|
||||
set(Vulkan_INCLUDE_DIR ${Vulkan_ROOT_DIR}/include)
|
||||
set(Vulkan_INCLUDE_DIRS ${Vulkan_INCLUDE_DIR})
|
||||
set(Vulkan_LIBRARY ${Vulkan_ROOT_DIR}/lib/vulkan-1.lib)
|
||||
set(Vulkan_LIBRARIES ${Vulkan_LIBRARY})
|
||||
else()
|
||||
message(WARNING "vulkan was not found, disabling WITH_VULKAN")
|
||||
set(WITH_VULKAN OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
if(EXISTS ${LIBDIR}/shaderc)
|
||||
set(SHADERC_ROOT_DIR ${LIBDIR}/shaderc)
|
||||
set(SHADERC_INCLUDE_DIR ${SHADERC_ROOT_DIR}/include)
|
||||
set(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
|
||||
set(SHADERC_LIBRARY optimized ${SHADERC_ROOT_DIR}/lib/shaderc_shared.lib debug ${SHADERC_ROOT_DIR}/lib/shaderc_shared_d.lib)
|
||||
set(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
|
||||
else()
|
||||
message(WARNING "shaderc was not found, disabling WITH_VULKAN")
|
||||
set(WITH_VULKAN OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
|
||||
set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
|
||||
|
@@ -27,7 +27,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
|
||||
# Install the CRT to the blender.crt Sub folder.
|
||||
install(FILES ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION ./blender.crt COMPONENT Libraries)
|
||||
|
||||
# Generating the manifest is a relatively expensive operation since
|
||||
# Generating the manifest is a relativly expensive operation since
|
||||
# it is collecting an sha1 hash for every file required. so only do
|
||||
# this work when the libs have either changed or the manifest does
|
||||
# not exist yet.
|
||||
|
@@ -11,7 +11,7 @@ import queue
|
||||
|
||||
execution_queue = queue.Queue()
|
||||
|
||||
# This function can safely be called in another thread.
|
||||
# This function can savely be called in another thread.
|
||||
# The function will be executed when the timer runs the next time.
|
||||
def run_in_main_thread(function):
|
||||
execution_queue.put(function)
|
||||
|
@@ -42,13 +42,8 @@ class SimpleMouseOperator(bpy.types.Operator):
|
||||
self.y = event.mouse_y
|
||||
return self.execute(context)
|
||||
|
||||
# Only needed if you want to add into a dynamic menu
|
||||
def menu_func(self, context):
|
||||
self.layout.operator(SimpleMouseOperator.bl_idname, text="Simple Mouse Operator")
|
||||
|
||||
# Register and add to the view menu (required to also use F3 search "Simple Mouse Operator" for quick access)
|
||||
bpy.utils.register_class(SimpleMouseOperator)
|
||||
bpy.types.VIEW3D_MT_view.append(menu_func)
|
||||
|
||||
# Test call to the newly defined operator.
|
||||
# Here we call the operator and invoke it, meaning that the settings are taken
|
||||
|
@@ -43,7 +43,7 @@ def menu_func(self, context):
|
||||
self.layout.operator(ExportSomeData.bl_idname, text="Text Export Operator")
|
||||
|
||||
|
||||
# Register and add to the file selector (required to also use F3 search "Text Export Operator" for quick access)
|
||||
# Register and add to the file selector
|
||||
bpy.utils.register_class(ExportSomeData)
|
||||
bpy.types.TOPBAR_MT_file_export.append(menu_func)
|
||||
|
||||
|
@@ -27,14 +27,8 @@ class DialogOperator(bpy.types.Operator):
|
||||
wm = context.window_manager
|
||||
return wm.invoke_props_dialog(self)
|
||||
|
||||
# Only needed if you want to add into a dynamic menu
|
||||
def menu_func(self, context):
|
||||
self.layout.operator(DialogOperator.bl_idname, text="Dialog Operator")
|
||||
|
||||
|
||||
# Register and add to the object menu (required to also use F3 search "Dialog Operator" for quick access)
|
||||
bpy.utils.register_class(DialogOperator)
|
||||
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||
|
||||
# Test call.
|
||||
bpy.ops.object.dialog_operator('INVOKE_DEFAULT')
|
||||
|
@@ -41,13 +41,8 @@ class CustomDrawOperator(bpy.types.Operator):
|
||||
|
||||
col.prop(self, "my_string")
|
||||
|
||||
# Only needed if you want to add into a dynamic menu
|
||||
def menu_func(self, context):
|
||||
self.layout.operator(CustomDrawOperator.bl_idname, text="Custom Draw Operator")
|
||||
|
||||
# Register and add to the object menu (required to also use F3 search "Custom Draw Operator" for quick access)
|
||||
bpy.utils.register_class(CustomDrawOperator)
|
||||
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||
|
||||
# test call
|
||||
bpy.ops.object.custom_draw('INVOKE_DEFAULT')
|
||||
|
@@ -55,13 +55,8 @@ class ModalOperator(bpy.types.Operator):
|
||||
context.window_manager.modal_handler_add(self)
|
||||
return {'RUNNING_MODAL'}
|
||||
|
||||
# Only needed if you want to add into a dynamic menu
|
||||
def menu_func(self, context):
|
||||
self.layout.operator(ModalOperator.bl_idname, text="Modal Operator")
|
||||
|
||||
# Register and add to the object menu (required to also use F3 search "Modal Operator" for quick access)
|
||||
bpy.utils.register_class(ModalOperator)
|
||||
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||
|
||||
# test call
|
||||
bpy.ops.object.modal_operator('INVOKE_DEFAULT')
|
||||
|
@@ -31,13 +31,8 @@ class SearchEnumOperator(bpy.types.Operator):
|
||||
context.window_manager.invoke_search_popup(self)
|
||||
return {'RUNNING_MODAL'}
|
||||
|
||||
# Only needed if you want to add into a dynamic menu
|
||||
def menu_func(self, context):
|
||||
self.layout.operator(SearchEnumOperator.bl_idname, text="Search Enum Operator")
|
||||
|
||||
# Register and add to the object menu (required to also use F3 search "Search Enum Operator" for quick access)
|
||||
bpy.utils.register_class(SearchEnumOperator)
|
||||
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||
|
||||
# test call
|
||||
bpy.ops.object.search_enum_operator('INVOKE_DEFAULT')
|
||||
|
@@ -22,13 +22,8 @@ class HelloWorldOperator(bpy.types.Operator):
|
||||
print("Hello World")
|
||||
return {'FINISHED'}
|
||||
|
||||
# Only needed if you want to add into a dynamic menu
|
||||
def menu_func(self, context):
|
||||
self.layout.operator(HelloWorldOperator.bl_idname, text="Hello World Operator")
|
||||
|
||||
# Register and add to the view menu (required to also use F3 search "Hello World Operator" for quick access)
|
||||
bpy.utils.register_class(HelloWorldOperator)
|
||||
bpy.types.VIEW3D_MT_view.append(menu_func)
|
||||
|
||||
# test call to the newly defined operator
|
||||
bpy.ops.wm.hello_world()
|
||||
|
@@ -728,7 +728,7 @@ Abusing RNA property callbacks
|
||||
------------------------------
|
||||
|
||||
Python-defined RNA properties can have custom callbacks. Trying to perform complex operations
|
||||
from there, like calling an operator, may work, but is not officially recommended nor supported.
|
||||
from there, like calling an operator, may work, but is not officialy recommended nor supported.
|
||||
|
||||
Main reason is that those callback should be very fast, but additionally, it may for example
|
||||
create issues with undo/redo system (most operators store an history step, and editing an RNA
|
||||
|
@@ -2254,7 +2254,7 @@ def main():
|
||||
# First monkey patch to load in fake members.
|
||||
setup_monkey_patch()
|
||||
|
||||
# Perform changes to Blender itself.
|
||||
# Perform changes to Blender it's self.
|
||||
setup_data = setup_blender()
|
||||
|
||||
# eventually, create the dirs
|
||||
|
4
extern/CMakeLists.txt
vendored
4
extern/CMakeLists.txt
vendored
@@ -116,7 +116,3 @@ endif()
|
||||
if (WITH_COMPOSITOR)
|
||||
add_subdirectory(smaa_areatex)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
add_subdirectory(vulkan_memory_allocator)
|
||||
endif()
|
||||
|
42
extern/hipew/include/hipew.h
vendored
42
extern/hipew/include/hipew.h
vendored
@@ -804,29 +804,31 @@ typedef enum hipDeviceP2PAttr {
|
||||
} hipDeviceP2PAttr;
|
||||
|
||||
typedef struct HIP_MEMCPY3D {
|
||||
unsigned int srcXInBytes;
|
||||
unsigned int srcY;
|
||||
unsigned int srcZ;
|
||||
unsigned int srcLOD;
|
||||
size_t srcXInBytes;
|
||||
size_t srcY;
|
||||
size_t srcZ;
|
||||
size_t srcLOD;
|
||||
hipMemoryType srcMemoryType;
|
||||
const void* srcHost;
|
||||
hipDeviceptr_t srcDevice;
|
||||
hArray srcArray;
|
||||
unsigned int srcPitch;
|
||||
unsigned int srcHeight;
|
||||
unsigned int dstXInBytes;
|
||||
unsigned int dstY;
|
||||
unsigned int dstZ;
|
||||
unsigned int dstLOD;
|
||||
hArray * srcArray;
|
||||
void* reserved0;
|
||||
size_t srcPitch;
|
||||
size_t srcHeight;
|
||||
size_t dstXInBytes;
|
||||
size_t dstY;
|
||||
size_t dstZ;
|
||||
size_t dstLOD;
|
||||
hipMemoryType dstMemoryType;
|
||||
void* dstHost;
|
||||
hipDeviceptr_t dstDevice;
|
||||
hArray dstArray;
|
||||
unsigned int dstPitch;
|
||||
unsigned int dstHeight;
|
||||
unsigned int WidthInBytes;
|
||||
unsigned int Height;
|
||||
unsigned int Depth;
|
||||
hArray * dstArray;
|
||||
void* reserved1;
|
||||
size_t dstPitch;
|
||||
size_t dstHeight;
|
||||
size_t WidthInBytes;
|
||||
size_t Height;
|
||||
size_t Depth;
|
||||
} HIP_MEMCPY3D;
|
||||
|
||||
typedef struct HIP_MEMCPY3D_PEER_st {
|
||||
@@ -877,7 +879,7 @@ typedef struct HIP_RESOURCE_DESC_st {
|
||||
hipResourceType resType;
|
||||
union {
|
||||
struct {
|
||||
hArray h_Array;
|
||||
hArray * h_Array;
|
||||
} array;
|
||||
struct {
|
||||
hipMipmappedArray_t hMipmappedArray;
|
||||
@@ -1072,10 +1074,9 @@ typedef enum hiprtcResult {
|
||||
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
||||
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
|
||||
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
||||
typedef hipError_t HIPAPI thipGetDevice(int* device);
|
||||
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
|
||||
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
||||
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
|
||||
typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
|
||||
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
||||
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
||||
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
||||
@@ -1208,7 +1209,6 @@ extern thipDriverGetVersion *hipDriverGetVersion;
|
||||
extern thipGetDevice *hipGetDevice;
|
||||
extern thipGetDeviceCount *hipGetDeviceCount;
|
||||
extern thipGetDeviceProperties *hipGetDeviceProperties;
|
||||
extern thipDeviceGet* hipDeviceGet;
|
||||
extern thipDeviceGetName *hipDeviceGetName;
|
||||
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||
|
2
extern/hipew/src/hipew.c
vendored
2
extern/hipew/src/hipew.c
vendored
@@ -71,7 +71,6 @@ thipDriverGetVersion *hipDriverGetVersion;
|
||||
thipGetDevice *hipGetDevice;
|
||||
thipGetDeviceCount *hipGetDeviceCount;
|
||||
thipGetDeviceProperties *hipGetDeviceProperties;
|
||||
thipDeviceGet* hipDeviceGet;
|
||||
thipDeviceGetName *hipDeviceGetName;
|
||||
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||
@@ -256,7 +255,6 @@ static int hipewHipInit(void) {
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
||||
|
42
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
42
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
@@ -1,42 +0,0 @@
|
||||
# ***** BEGIN GPL LICENSE BLOCK *****
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# The Original Code is Copyright (C) 2012, Blender Foundation
|
||||
# All rights reserved.
|
||||
# ***** END GPL LICENSE BLOCK *****
|
||||
|
||||
|
||||
set(INC
|
||||
.
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
${Vulkan_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
set(SRC
|
||||
vk_mem_alloc_impl.cc
|
||||
|
||||
vk_mem_alloc.h
|
||||
)
|
||||
|
||||
blender_add_lib(extern_vulkan_memory_allocator "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
target_compile_options(extern_vulkan_memory_allocator
|
||||
PRIVATE "-Wno-nullability-completeness"
|
||||
)
|
||||
endif()
|
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
@@ -1,19 +0,0 @@
|
||||
Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
@@ -1,5 +0,0 @@
|
||||
Project: VulkanMemoryAllocator
|
||||
URL: https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
|
||||
License: MIT
|
||||
Upstream version: 4b047fd
|
||||
Local modifications: None
|
134
extern/vulkan_memory_allocator/README.md
vendored
134
extern/vulkan_memory_allocator/README.md
vendored
@@ -1,134 +0,0 @@
|
||||
# Vulkan Memory Allocator
|
||||
|
||||
Easy to integrate Vulkan memory allocation library.
|
||||
|
||||
**Documentation:** See [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [src/vk_mem_alloc.h](src/vk_mem_alloc.h))
|
||||
|
||||
**License:** MIT. See [LICENSE.txt](LICENSE.txt)
|
||||
|
||||
**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
|
||||
|
||||
**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
|
||||
|
||||
**Build status:**
|
||||
|
||||
- Windows: [](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)
|
||||
- Linux: [](https://travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
|
||||
|
||||
# Problem
|
||||
|
||||
Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics API-s, like D3D11 or OpenGL) for several reasons:
|
||||
|
||||
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
|
||||
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
|
||||
- Driver must be queried for supported memory heaps and memory types. Different IHVs provide different types of it.
|
||||
- It is recommended practice to allocate bigger chunks of memory and assign parts of them to particular resources.
|
||||
|
||||
# Features
|
||||
|
||||
This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
|
||||
|
||||
1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
|
||||
- Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
|
||||
2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
|
||||
- Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
|
||||
3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
|
||||
|
||||
Additional features:
|
||||
|
||||
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
|
||||
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally.
|
||||
- Configuration: Fill optional members of CreateInfo structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
|
||||
- Customization: Predefine appropriate macros to provide your own implementation of all external facilities used by the library, from assert, mutex, and atomic, to vector and linked list.
|
||||
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and you get access to mapped pointer.
|
||||
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
|
||||
- Support for resource aliasing (overlap).
|
||||
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
|
||||
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
|
||||
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
|
||||
- Support for Vulkan 1.0, 1.1, 1.2.
|
||||
- Support for extensions (and equivalent functionality included in new Vulkan versions):
|
||||
- VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
|
||||
- VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
|
||||
- VK_AMD_device_coherent_memory
|
||||
- VK_KHR_buffer_device_address
|
||||
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
|
||||
- Lost allocations: Allocate memory with appropriate flags and let the library remove allocations that are not used for many frames to make room for new ones.
|
||||
- Statistics: Obtain detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
|
||||
- Debug annotations: Associate string with name or opaque pointer to your own data with every allocation.
|
||||
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations and gaps between them.
|
||||
- Convert this JSON dump into a picture to visualize your memory. See [tools/VmaDumpVis](tools/VmaDumpVis/README.md).
|
||||
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number before and after every allocation to detect out-of-bounds memory corruption.
|
||||
- Record and replay sequence of calls to library functions to a file to check correctness, measure performance, and gather statistics.
|
||||
|
||||
# Prequisites
|
||||
|
||||
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. STL containers are not used by default.
|
||||
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
|
||||
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
|
||||
- Interface documented using Doxygen-style comments.
|
||||
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
|
||||
|
||||
# Example
|
||||
|
||||
Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
|
||||
|
||||
```cpp
|
||||
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
bufferInfo.size = 65536;
|
||||
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
|
||||
VmaAllocationCreateInfo allocInfo = {};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
|
||||
```
|
||||
|
||||
With this one function call:
|
||||
|
||||
1. `VkBuffer` is created.
|
||||
2. `VkDeviceMemory` block is allocated if needed.
|
||||
3. An unused region of the memory block is bound to this buffer.
|
||||
|
||||
`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like Vulkan memory handle and offset.
|
||||
|
||||
# Binaries
|
||||
|
||||
The release comes with precompiled binary executables for "VulkanSample" application which contains test suite and "VmaReplay" tool. They are compiled using Visual Studio 2019, so they require appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If their launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
|
||||
|
||||
# Read more
|
||||
|
||||
See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
|
||||
|
||||
# Software using this library
|
||||
|
||||
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
|
||||
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
|
||||
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
|
||||
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
|
||||
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
|
||||
- **[Flax Engine](https://flaxengine.com/)**
|
||||
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
|
||||
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
|
||||
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
|
||||
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
|
||||
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
|
||||
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
|
||||
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
|
||||
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
|
||||
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
|
||||
|
||||
[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
|
||||
|
||||
# See also
|
||||
|
||||
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
|
||||
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
|
||||
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
|
||||
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
|
||||
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
|
||||
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Joe Hermaszewski (@expipiplus1). License BSD-3-Clause.
|
||||
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
|
||||
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.
|
19560
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
19560
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
File diff suppressed because it is too large
Load Diff
@@ -1,3 +0,0 @@
|
||||
#define VMA_IMPLEMENTATION
|
||||
|
||||
#include "vk_mem_alloc.h"
|
@@ -85,7 +85,3 @@ endif()
|
||||
if(UNIX AND NOT APPLE)
|
||||
add_subdirectory(libc_compat)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN)
|
||||
add_subdirectory(shader_compiler)
|
||||
endif()
|
||||
|
@@ -226,9 +226,6 @@ add_definitions(
|
||||
-DCCL_NAMESPACE_END=}
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
add_definitions(-DWITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
if(WITH_CYCLES_STANDALONE_GUI)
|
||||
add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
|
||||
endif()
|
||||
@@ -337,7 +334,7 @@ else()
|
||||
endif()
|
||||
|
||||
# Warnings
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
|
||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
|
||||
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")
|
||||
|
@@ -138,6 +138,11 @@ endif()
|
||||
|
||||
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
# avoid link failure with clang 3.4 debug
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -gline-tables-only")
|
||||
endif()
|
||||
|
||||
add_dependencies(bf_intern_cycles bf_rna)
|
||||
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH})
|
||||
|
@@ -233,7 +233,6 @@ def list_render_passes(scene, srl):
|
||||
if crl.denoising_store_passes:
|
||||
yield ("Denoising Normal", "XYZ", 'VECTOR')
|
||||
yield ("Denoising Albedo", "RGB", 'COLOR')
|
||||
yield ("Denoising Depth", "Z", 'VALUE')
|
||||
|
||||
# Custom AOV passes.
|
||||
for aov in srl.aovs:
|
||||
|
@@ -40,10 +40,10 @@ class AddPresetIntegrator(AddPresetBase, Operator):
|
||||
"cycles.transparent_max_bounces",
|
||||
"cycles.caustics_reflective",
|
||||
"cycles.caustics_refractive",
|
||||
"cycles.blur_glossy",
|
||||
"cycles.use_fast_gi",
|
||||
"cycles.ao_bounces",
|
||||
"cycles.ao_bounces_render",
|
||||
"cycles.blur_glossy"
|
||||
"cycles.use_fast_gi"
|
||||
"cycles.ao_bounces"
|
||||
"cycles.ao_bounces_render"
|
||||
]
|
||||
|
||||
preset_subdir = "cycles/integrator"
|
||||
|
@@ -87,7 +87,7 @@ enum_use_layer_samples = (
|
||||
|
||||
enum_sampling_pattern = (
|
||||
('SOBOL', "Sobol", "Use Sobol random sampling pattern", 0),
|
||||
('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
|
||||
('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
|
||||
)
|
||||
|
||||
enum_volume_sampling = (
|
||||
@@ -218,12 +218,6 @@ enum_denoising_prefilter = (
|
||||
('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
|
||||
)
|
||||
|
||||
enum_direct_light_sampling_type = (
|
||||
('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling", "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0),
|
||||
('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1),
|
||||
('NEXT_EVENT_ESTIMATION', "Next-Event Estimation", "Direct light contributions are only sampled using next-event estimation", 2),
|
||||
)
|
||||
|
||||
def update_render_passes(self, context):
|
||||
scene = context.scene
|
||||
view_layer = context.view_layer
|
||||
@@ -331,13 +325,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
default=1024,
|
||||
)
|
||||
|
||||
sample_offset: IntProperty(
|
||||
name="Sample Offset",
|
||||
description="Number of samples to skip when starting render",
|
||||
min=0, max=(1 << 24),
|
||||
default=0,
|
||||
)
|
||||
|
||||
time_limit: FloatProperty(
|
||||
name="Time Limit",
|
||||
description="Limit the render time (excluding synchronization time)."
|
||||
@@ -352,14 +339,14 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
name="Sampling Pattern",
|
||||
description="Random sampling pattern used by the integrator. When adaptive sampling is enabled, Progressive Multi-Jitter is always used instead of Sobol",
|
||||
items=enum_sampling_pattern,
|
||||
default='PROGRESSIVE_MULTI_JITTER',
|
||||
default='PROGRESSIVE_MUTI_JITTER',
|
||||
)
|
||||
|
||||
scrambling_distance: FloatProperty(
|
||||
name="Scrambling Distance",
|
||||
default=1.0,
|
||||
min=0.0, max=1.0,
|
||||
description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low. Only works when not using adaptive sampling",
|
||||
description="Lower values give faster rendering with GPU rendering and less noise with all devices at the cost of possible artifacts if set too low. Only works when not using adaptive sampling",
|
||||
)
|
||||
preview_scrambling_distance: BoolProperty(
|
||||
name="Scrambling Distance viewport",
|
||||
@@ -367,10 +354,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
|
||||
)
|
||||
|
||||
auto_scrambling_distance: BoolProperty(
|
||||
name="Automatic Scrambling Distance",
|
||||
adaptive_scrambling_distance: BoolProperty(
|
||||
name="Adaptive Scrambling Distance",
|
||||
default=False,
|
||||
description="Automatically reduce the randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts. Only works when not using adaptive sampling",
|
||||
description="Uses a formula to adapt the scrambling distance strength based on the sample count",
|
||||
)
|
||||
|
||||
use_layer_samples: EnumProperty(
|
||||
@@ -428,13 +415,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
default=0,
|
||||
)
|
||||
|
||||
direct_light_sampling_type: EnumProperty(
|
||||
name="Direct Light Sampling Type",
|
||||
description="The type of strategy used for sampling direct light contributions",
|
||||
items=enum_direct_light_sampling_type,
|
||||
default='MULTIPLE_IMPORTANCE_SAMPLING',
|
||||
)
|
||||
|
||||
min_light_bounces: IntProperty(
|
||||
name="Min Light Bounces",
|
||||
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
|
||||
@@ -790,8 +770,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
)
|
||||
|
||||
use_auto_tile: BoolProperty(
|
||||
name="Using Tiling",
|
||||
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||
name="Auto Tiles",
|
||||
description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||
default=True,
|
||||
)
|
||||
tile_size: IntProperty(
|
||||
@@ -1380,7 +1360,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
elif entry.type == 'CPU':
|
||||
cpu_devices.append(entry)
|
||||
# Extend all GPU devices with CPU.
|
||||
if len(devices) and compute_device_type != 'CPU':
|
||||
if compute_device_type != 'CPU':
|
||||
devices.extend(cpu_devices)
|
||||
return devices
|
||||
|
||||
@@ -1398,18 +1378,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
self.refresh_devices()
|
||||
return None
|
||||
|
||||
def get_compute_device_type(self):
|
||||
if self.compute_device_type == '':
|
||||
return 'NONE'
|
||||
return self.compute_device_type
|
||||
|
||||
def get_num_gpu_devices(self):
|
||||
import _cycles
|
||||
compute_device_type = self.get_compute_device_type()
|
||||
device_list = _cycles.available_devices(compute_device_type)
|
||||
device_list = _cycles.available_devices(self.compute_device_type)
|
||||
num = 0
|
||||
for device in device_list:
|
||||
if device[1] != compute_device_type:
|
||||
if device[1] != self.compute_device_type:
|
||||
continue
|
||||
for dev in self.devices:
|
||||
if dev.use and dev.id == device[2]:
|
||||
@@ -1439,9 +1413,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
||||
elif device_type == 'HIP':
|
||||
import sys
|
||||
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="Requires discrete AMD GPU with ??? architecture", icon='BLANK1')
|
||||
if sys.platform[:3] == "win":
|
||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||
col.label(text="and AMD driver version ??? or newer", icon='BLANK1')
|
||||
return
|
||||
|
||||
for device in devices:
|
||||
@@ -1451,16 +1425,15 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
row = layout.row()
|
||||
row.prop(self, "compute_device_type", expand=True)
|
||||
|
||||
compute_device_type = self.get_compute_device_type()
|
||||
if compute_device_type == 'NONE':
|
||||
if self.compute_device_type == 'NONE':
|
||||
return
|
||||
row = layout.row()
|
||||
devices = self.get_devices_for_type(compute_device_type)
|
||||
self._draw_devices(row, compute_device_type, devices)
|
||||
devices = self.get_devices_for_type(self.compute_device_type)
|
||||
self._draw_devices(row, self.compute_device_type, devices)
|
||||
|
||||
import _cycles
|
||||
has_peer_memory = 0
|
||||
for device in _cycles.available_devices(compute_device_type):
|
||||
for device in _cycles.available_devices(self.compute_device_type):
|
||||
if device[3] and self.find_existing_device_entry(device).use:
|
||||
has_peer_memory += 1
|
||||
if has_peer_memory > 1:
|
||||
|
@@ -290,18 +290,15 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
|
||||
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||
col.prop(cscene, "sampling_pattern", text="Pattern")
|
||||
|
||||
col = layout.column(align=True)
|
||||
col.prop(cscene, "sample_offset")
|
||||
|
||||
layout.separator()
|
||||
|
||||
heading = layout.column(align=True, heading="Scrambling Distance")
|
||||
heading.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||
heading.prop(cscene, "auto_scrambling_distance", text="Automatic")
|
||||
sub = heading.row()
|
||||
col = layout.column(align=True)
|
||||
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||
col.prop(cscene, "scrambling_distance", text="Scrambling Distance")
|
||||
col.prop(cscene, "adaptive_scrambling_distance", text="Adaptive")
|
||||
sub = col.row(align=True)
|
||||
sub.active = not cscene.use_preview_adaptive_sampling
|
||||
sub.prop(cscene, "preview_scrambling_distance", text="Viewport")
|
||||
heading.prop(cscene, "scrambling_distance", text="Multiplier")
|
||||
|
||||
layout.separator()
|
||||
|
||||
@@ -1054,7 +1051,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
|
||||
|
||||
|
||||
def has_geometry_visibility(ob):
|
||||
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'HAIR'}) or
|
||||
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
|
||||
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
|
||||
|
||||
|
||||
|
@@ -86,7 +86,7 @@ def do_versions(self):
|
||||
# Device might not currently be available so this can fail
|
||||
try:
|
||||
if system.legacy_compute_device_type == 1:
|
||||
prop.compute_device_type = 'NONE' # Was OpenCL
|
||||
prop.compute_device_type = 'OPENCL'
|
||||
elif system.legacy_compute_device_type == 2:
|
||||
prop.compute_device_type = 'CUDA'
|
||||
else:
|
||||
@@ -97,12 +97,6 @@ def do_versions(self):
|
||||
# Init device list for UI
|
||||
prop.get_devices(prop.compute_device_type)
|
||||
|
||||
if bpy.context.preferences.version <= (3, 0, 40):
|
||||
# Disable OpenCL device
|
||||
prop = bpy.context.preferences.addons[__package__].preferences
|
||||
if prop.is_property_set("compute_device_type") and prop['compute_device_type'] == 4:
|
||||
prop.compute_device_type = 'NONE'
|
||||
|
||||
# We don't modify startup file because it assumes to
|
||||
# have all the default values only.
|
||||
if not bpy.data.is_saved:
|
||||
@@ -243,7 +237,7 @@ def do_versions(self):
|
||||
cscene.use_preview_denoising = False
|
||||
if not cscene.is_property_set("sampling_pattern") or \
|
||||
cscene.get('sampling_pattern') >= 2:
|
||||
cscene.sampling_pattern = 'PROGRESSIVE_MULTI_JITTER'
|
||||
cscene.sampling_pattern = 'PROGRESSIVE_MUTI_JITTER'
|
||||
|
||||
# Removal of square samples.
|
||||
cscene = scene.cycles
|
||||
|
@@ -639,7 +639,7 @@ void BlenderSync::sync_camera_motion(
|
||||
/* TODO(sergey): De-duplicate calculation with camera sync. */
|
||||
float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
|
||||
if (fov != cam->get_fov()) {
|
||||
VLOG(3) << "Camera " << b_ob.name() << " FOV change detected.";
|
||||
VLOG(1) << "Camera " << b_ob.name() << " FOV change detected.";
|
||||
if (motion_time == 0.0f) {
|
||||
cam->set_fov(fov);
|
||||
}
|
||||
|
@@ -199,7 +199,7 @@ static bool ObtainCacheParticleUV(Hair *hair,
|
||||
b_mesh->uv_layers.begin(l);
|
||||
|
||||
float2 uv = zero_float2();
|
||||
if (!b_mesh->uv_layers.empty())
|
||||
if (b_mesh->uv_layers.length())
|
||||
b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
|
||||
CData->curve_uv.push_back_slow(uv);
|
||||
|
||||
@@ -261,7 +261,7 @@ static bool ObtainCacheParticleVcol(Hair *hair,
|
||||
b_mesh->vertex_colors.begin(l);
|
||||
|
||||
float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
if (!b_mesh->vertex_colors.empty())
|
||||
if (b_mesh->vertex_colors.length())
|
||||
b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
|
||||
CData->curve_vcol.push_back_slow(vcol);
|
||||
|
||||
@@ -304,6 +304,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
|
||||
}
|
||||
}
|
||||
|
||||
if (num_curves > 0) {
|
||||
VLOG(1) << "Exporting curve segments for mesh " << hair->name;
|
||||
}
|
||||
|
||||
hair->reserve_curves(hair->num_curves() + num_curves, hair->get_curve_keys().size() + num_keys);
|
||||
|
||||
num_keys = 0;
|
||||
@@ -352,7 +356,7 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
|
||||
|
||||
/* check allocation */
|
||||
if ((hair->get_curve_keys().size() != num_keys) || (hair->num_curves() != num_curves)) {
|
||||
VLOG(1) << "Hair memory allocation failed, clearing data.";
|
||||
VLOG(1) << "Allocation failed, clearing data";
|
||||
hair->clear(true);
|
||||
}
|
||||
}
|
||||
@@ -408,11 +412,16 @@ static void export_hair_motion_validate_attribute(Hair *hair,
|
||||
if (num_motion_keys != num_keys || !have_motion) {
|
||||
/* No motion or hair "topology" changed, remove attributes again. */
|
||||
if (num_motion_keys != num_keys) {
|
||||
VLOG(1) << "Hair topology changed, removing motion attribute.";
|
||||
VLOG(1) << "Hair topology changed, removing attribute.";
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "No motion, removing attribute.";
|
||||
}
|
||||
hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
else if (motion_step > 0) {
|
||||
VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
|
||||
|
||||
/* Motion, fill up previous steps that we might have skipped because
|
||||
* they had no motion, but we need them anyway now. */
|
||||
for (int step = 0; step < motion_step; step++) {
|
||||
@@ -428,12 +437,16 @@ static void export_hair_motion_validate_attribute(Hair *hair,
|
||||
|
||||
static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
|
||||
{
|
||||
VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
|
||||
<< motion_step;
|
||||
|
||||
/* find attribute */
|
||||
Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
bool new_attribute = false;
|
||||
|
||||
/* add new attribute if it doesn't exist already */
|
||||
if (!attr_mP) {
|
||||
VLOG(1) << "Creating new motion vertex position attribute";
|
||||
attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
new_attribute = true;
|
||||
}
|
||||
@@ -669,6 +682,10 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
|
||||
const int num_keys = b_hair.points.length();
|
||||
const int num_curves = b_hair.curves.length();
|
||||
|
||||
if (num_curves > 0) {
|
||||
VLOG(1) << "Exporting curve segments for hair " << hair->name;
|
||||
}
|
||||
|
||||
hair->reserve_curves(num_curves, num_keys);
|
||||
|
||||
/* Export curves and points. */
|
||||
@@ -726,11 +743,15 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
|
||||
|
||||
static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
|
||||
{
|
||||
VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
|
||||
<< motion_step;
|
||||
|
||||
/* Find or add attribute. */
|
||||
Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
bool new_attribute = false;
|
||||
|
||||
if (!attr_mP) {
|
||||
VLOG(1) << "Creating new motion vertex position attribute";
|
||||
attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
new_attribute = true;
|
||||
}
|
||||
|
@@ -334,7 +334,7 @@ bool BlenderDisplayDriver::update_begin(const Params ¶ms,
|
||||
|
||||
/* Update PBO dimensions if needed.
|
||||
*
|
||||
* NOTE: Allocate the PBO for the size which will fit the final render resolution (as in,
|
||||
* NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
|
||||
* at a resolution divider 1. This was we don't need to recreate graphics interoperability
|
||||
* objects which are costly and which are tied to the specific underlying buffer size.
|
||||
* The downside of this approach is that when graphics interoperability is not used we are
|
||||
|
@@ -555,7 +555,7 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh,
|
||||
/* Create uv map attributes. */
|
||||
static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
|
||||
{
|
||||
if (!b_mesh.uv_layers.empty()) {
|
||||
if (b_mesh.uv_layers.length() != 0) {
|
||||
for (BL::MeshUVLoopLayer &l : b_mesh.uv_layers) {
|
||||
const bool active_render = l.active_render();
|
||||
AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
|
||||
@@ -619,7 +619,7 @@ static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
|
||||
|
||||
static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
|
||||
{
|
||||
if (!b_mesh.uv_layers.empty()) {
|
||||
if (b_mesh.uv_layers.length() != 0) {
|
||||
BL::Mesh::uv_layers_iterator l;
|
||||
int i = 0;
|
||||
|
||||
@@ -951,7 +951,7 @@ static void create_mesh(Scene *scene,
|
||||
N = attr_N->data_float3();
|
||||
|
||||
/* create generated coordinates from undeformed coordinates */
|
||||
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.empty()) &&
|
||||
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.length() == 0) &&
|
||||
(mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
|
||||
if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
|
||||
Attribute *attr = attributes.add(ATTR_STD_GENERATED);
|
||||
|
@@ -62,46 +62,31 @@ bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)
|
||||
bool BlenderSync::object_is_geometry(BL::Object &b_ob)
|
||||
{
|
||||
BL::ID b_ob_data = b_ob_info.object_data;
|
||||
BL::ID b_ob_data = b_ob.data();
|
||||
|
||||
if (!b_ob_data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BL::Object::type_enum type = b_ob_info.iter_object.type();
|
||||
BL::Object::type_enum type = b_ob.type();
|
||||
|
||||
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
|
||||
/* Will be exported attached to mesh. */
|
||||
return true;
|
||||
}
|
||||
else if (type == BL::Object::type_CURVE) {
|
||||
/* Skip exporting curves without faces, overhead can be
|
||||
* significant if there are many for path animation. */
|
||||
BL::Curve b_curve(b_ob_data);
|
||||
|
||||
/* Other object types that are not meshes but evaluate to meshes are presented to render engines
|
||||
* as separate instance objects. Metaballs and surface objects have not been affected by that
|
||||
* change yet. */
|
||||
if (type == BL::Object::type_SURFACE || type == BL::Object::type_META) {
|
||||
return true;
|
||||
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
|
||||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
|
||||
}
|
||||
|
||||
return b_ob_data.is_a(&RNA_Mesh);
|
||||
}
|
||||
|
||||
bool BlenderSync::object_can_have_geometry(BL::Object &b_ob)
|
||||
{
|
||||
BL::Object::type_enum type = b_ob.type();
|
||||
switch (type) {
|
||||
case BL::Object::type_MESH:
|
||||
case BL::Object::type_CURVE:
|
||||
case BL::Object::type_SURFACE:
|
||||
case BL::Object::type_META:
|
||||
case BL::Object::type_FONT:
|
||||
case BL::Object::type_HAIR:
|
||||
case BL::Object::type_POINTCLOUD:
|
||||
case BL::Object::type_VOLUME:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
else {
|
||||
return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
|
||||
b_ob_data.is_a(&RNA_MetaBall));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,11 +161,6 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
if (is_instance) {
|
||||
persistent_id_array = b_instance.persistent_id();
|
||||
persistent_id = persistent_id_array.data;
|
||||
if (!b_ob_info.is_real_object_data()) {
|
||||
/* Remember which object data the geometry is coming from, so that we can sync it when the
|
||||
* object has changed. */
|
||||
instance_geometries_by_object[b_ob_info.real_object.ptr.data].insert(b_ob_info.object_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* light is handled separately */
|
||||
@@ -207,7 +187,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
}
|
||||
|
||||
/* only interested in object that we can create meshes from */
|
||||
if (!object_is_geometry(b_ob_info)) {
|
||||
if (!object_is_geometry(b_ob)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -294,7 +274,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
||||
|
||||
object->set_visibility(visibility);
|
||||
|
||||
object->set_is_shadow_catcher(b_ob.is_shadow_catcher() || b_parent.is_shadow_catcher());
|
||||
object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
|
||||
|
||||
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
|
||||
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
|
||||
@@ -580,7 +560,6 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
|
||||
else {
|
||||
geometry_motion_synced.clear();
|
||||
}
|
||||
instance_geometries_by_object.clear();
|
||||
|
||||
/* initialize culling */
|
||||
BlenderObjectCulling culling(scene, b_scene);
|
||||
|
@@ -157,6 +157,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
|
||||
|
||||
DebugFlags().running_inside_blender = true;
|
||||
|
||||
VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
@@ -883,6 +885,8 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
|
||||
|
||||
debug_flags_sync_from_scene(b_scene);
|
||||
|
||||
VLOG(2) << "Debug flags set to:\n" << DebugFlags();
|
||||
|
||||
debug_flags_set = true;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
@@ -892,6 +896,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
|
||||
{
|
||||
debug_flags_reset();
|
||||
if (debug_flags_set) {
|
||||
VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
|
||||
debug_flags_set = false;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
|
@@ -129,7 +129,7 @@ void BlenderSession::create_session()
|
||||
/* reset status/progress */
|
||||
last_status = "";
|
||||
last_error = "";
|
||||
last_progress = -1.0;
|
||||
last_progress = -1.0f;
|
||||
start_resize_time = 0.0;
|
||||
|
||||
/* create session */
|
||||
@@ -615,24 +615,6 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
sync->sync_camera(b_render, b_camera_override, width, height, "");
|
||||
sync->sync_data(
|
||||
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
|
||||
|
||||
/* Filtering settings for combined pass. */
|
||||
if (pass->get_type() == PASS_COMBINED) {
|
||||
Integrator *integrator = scene->integrator;
|
||||
integrator->set_use_direct_light((bake_filter & BL::BakeSettings::pass_filter_DIRECT) != 0);
|
||||
integrator->set_use_indirect_light((bake_filter & BL::BakeSettings::pass_filter_INDIRECT) !=
|
||||
0);
|
||||
integrator->set_use_diffuse((bake_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0);
|
||||
integrator->set_use_glossy((bake_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0);
|
||||
integrator->set_use_transmission(
|
||||
(bake_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0);
|
||||
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||
}
|
||||
|
||||
/* Always use transpanent background for baking. */
|
||||
scene->background->set_transparent(true);
|
||||
|
||||
/* Load built-in images from Blender. */
|
||||
builtin_images_load();
|
||||
}
|
||||
|
||||
@@ -859,7 +841,7 @@ void BlenderSession::get_status(string &status, string &substatus)
|
||||
session->progress.get_status(status, substatus);
|
||||
}
|
||||
|
||||
void BlenderSession::get_progress(double &progress, double &total_time, double &render_time)
|
||||
void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
|
||||
{
|
||||
session->progress.get_time(total_time, render_time);
|
||||
progress = session->progress.get_progress();
|
||||
@@ -867,10 +849,10 @@ void BlenderSession::get_progress(double &progress, double &total_time, double &
|
||||
|
||||
void BlenderSession::update_bake_progress()
|
||||
{
|
||||
double progress = session->progress.get_progress();
|
||||
float progress = session->progress.get_progress();
|
||||
|
||||
if (progress != last_progress) {
|
||||
b_engine.update_progress((float)progress);
|
||||
b_engine.update_progress(progress);
|
||||
last_progress = progress;
|
||||
}
|
||||
}
|
||||
@@ -879,7 +861,7 @@ void BlenderSession::update_status_progress()
|
||||
{
|
||||
string timestatus, status, substatus;
|
||||
string scene_status = "";
|
||||
double progress;
|
||||
float progress;
|
||||
double total_time, remaining_time = 0, render_time;
|
||||
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
|
||||
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
|
||||
@@ -923,7 +905,7 @@ void BlenderSession::update_status_progress()
|
||||
last_status_time = current_time;
|
||||
}
|
||||
if (progress != last_progress) {
|
||||
b_engine.update_progress((float)progress);
|
||||
b_engine.update_progress(progress);
|
||||
last_progress = progress;
|
||||
}
|
||||
|
||||
|
@@ -82,7 +82,7 @@ class BlenderSession {
|
||||
void tag_redraw();
|
||||
void tag_update();
|
||||
void get_status(string &status, string &substatus);
|
||||
void get_progress(double &progress, double &total_time, double &render_time);
|
||||
void get_progress(float &progress, double &total_time, double &render_time);
|
||||
void test_cancel();
|
||||
void update_status_progress();
|
||||
void update_bake_progress();
|
||||
@@ -108,7 +108,7 @@ class BlenderSession {
|
||||
|
||||
string last_status;
|
||||
string last_error;
|
||||
double last_progress;
|
||||
float last_progress;
|
||||
double last_status_time;
|
||||
|
||||
int width, height;
|
||||
|
@@ -162,19 +162,19 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
|
||||
/* Object */
|
||||
else if (b_id.is_a(&RNA_Object)) {
|
||||
BL::Object b_ob(b_id);
|
||||
const bool can_have_geometry = object_can_have_geometry(b_ob);
|
||||
const bool is_light = !can_have_geometry && object_is_light(b_ob);
|
||||
const bool is_geometry = object_is_geometry(b_ob);
|
||||
const bool is_light = !is_geometry && object_is_light(b_ob);
|
||||
|
||||
if (b_ob.is_instancer() && b_update.is_updated_shading()) {
|
||||
/* Needed for e.g. object color updates on instancer. */
|
||||
object_map.set_recalc(b_ob);
|
||||
}
|
||||
|
||||
if (can_have_geometry || is_light) {
|
||||
if (is_geometry || is_light) {
|
||||
const bool updated_geometry = b_update.is_updated_geometry();
|
||||
|
||||
/* Geometry (mesh, hair, volume). */
|
||||
if (can_have_geometry) {
|
||||
if (is_geometry) {
|
||||
if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
|
||||
object_map.set_recalc(b_ob);
|
||||
}
|
||||
@@ -183,15 +183,6 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
|
||||
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
|
||||
BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
|
||||
geometry_map.set_recalc(key);
|
||||
|
||||
/* Sync all contained geometry instances as well when the object changed.. */
|
||||
map<void *, set<BL::ID>>::const_iterator instance_geometries =
|
||||
instance_geometries_by_object.find(b_ob.ptr.data);
|
||||
if (instance_geometries != instance_geometries_by_object.end()) {
|
||||
for (BL::ID geometry : instance_geometries->second) {
|
||||
geometry_map.set_recalc(geometry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (updated_geometry) {
|
||||
@@ -365,8 +356,8 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
||||
|
||||
int samples = get_int(cscene, "samples");
|
||||
float scrambling_distance = get_float(cscene, "scrambling_distance");
|
||||
bool auto_scrambling_distance = get_boolean(cscene, "auto_scrambling_distance");
|
||||
if (auto_scrambling_distance) {
|
||||
bool adaptive_scrambling_distance = get_boolean(cscene, "adaptive_scrambling_distance");
|
||||
if (adaptive_scrambling_distance) {
|
||||
scrambling_distance *= 4.0f / sqrtf(samples);
|
||||
}
|
||||
|
||||
@@ -375,9 +366,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
||||
if ((preview && !preview_scrambling_distance) || use_adaptive_sampling)
|
||||
scrambling_distance = 1.0f;
|
||||
|
||||
if (scrambling_distance != 1.0f) {
|
||||
VLOG(3) << "Using scrambling distance: " << scrambling_distance;
|
||||
}
|
||||
VLOG(1) << "Used Scrambling Distance: " << scrambling_distance;
|
||||
integrator->set_scrambling_distance(scrambling_distance);
|
||||
|
||||
if (get_boolean(cscene, "use_fast_gi")) {
|
||||
@@ -392,12 +381,6 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
||||
integrator->set_ao_bounces(0);
|
||||
}
|
||||
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
DirectLightSamplingType direct_light_sampling_type = (DirectLightSamplingType)get_enum(
|
||||
cscene, "direct_light_sampling_type", DIRECT_LIGHT_SAMPLING_NUM, DIRECT_LIGHT_SAMPLING_MIS);
|
||||
integrator->set_direct_light_sampling_type(direct_light_sampling_type);
|
||||
#endif
|
||||
|
||||
const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
|
||||
integrator->set_use_denoise(denoise_params.use);
|
||||
|
||||
@@ -841,25 +824,18 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
||||
/* samples */
|
||||
int samples = get_int(cscene, "samples");
|
||||
int preview_samples = get_int(cscene, "preview_samples");
|
||||
int sample_offset = get_int(cscene, "sample_offset");
|
||||
|
||||
if (background) {
|
||||
params.samples = samples;
|
||||
params.sample_offset = sample_offset;
|
||||
}
|
||||
else {
|
||||
params.samples = preview_samples;
|
||||
if (params.samples == 0) {
|
||||
if (params.samples == 0)
|
||||
params.samples = INT_MAX;
|
||||
}
|
||||
params.sample_offset = 0;
|
||||
}
|
||||
|
||||
/* Clamp sample offset. */
|
||||
params.sample_offset = clamp(params.sample_offset, 0, Integrator::MAX_SAMPLES);
|
||||
|
||||
/* Clamp samples. */
|
||||
params.samples = clamp(params.samples, 0, Integrator::MAX_SAMPLES - params.sample_offset);
|
||||
params.samples = min(params.samples, Integrator::MAX_SAMPLES);
|
||||
|
||||
/* Viewport Performance */
|
||||
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
|
||||
@@ -878,7 +854,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
||||
|
||||
/* Time limit. */
|
||||
if (background) {
|
||||
params.time_limit = (double)get_float(cscene, "time_limit");
|
||||
params.time_limit = get_float(cscene, "time_limit");
|
||||
}
|
||||
else {
|
||||
/* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
|
||||
|
@@ -208,8 +208,7 @@ class BlenderSync {
|
||||
/* util */
|
||||
void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
|
||||
bool BKE_object_is_modified(BL::Object &b_ob);
|
||||
bool object_is_geometry(BObjectInfo &b_ob_info);
|
||||
bool object_can_have_geometry(BL::Object &b_ob);
|
||||
bool object_is_geometry(BL::Object &b_ob);
|
||||
bool object_is_light(BL::Object &b_ob);
|
||||
|
||||
/* variables */
|
||||
@@ -226,8 +225,6 @@ class BlenderSync {
|
||||
set<Geometry *> geometry_synced;
|
||||
set<Geometry *> geometry_motion_synced;
|
||||
set<Geometry *> geometry_motion_attribute_synced;
|
||||
/** Remember which geometries come from which objects to be able to sync them after changes. */
|
||||
map<void *, set<BL::ID>> instance_geometries_by_object;
|
||||
set<float> motion_times;
|
||||
void *world_map;
|
||||
bool world_recalc;
|
||||
|
@@ -303,7 +303,7 @@ static inline string image_user_file_path(BL::ImageUser &iuser,
|
||||
string filepath_str = string(filepath);
|
||||
if (load_tiled && ima.source() == BL::Image::source_TILED) {
|
||||
string udim;
|
||||
if (!ima.tiles.empty()) {
|
||||
if (ima.tiles.length() > 0) {
|
||||
udim = to_string(ima.tiles[0].number());
|
||||
}
|
||||
string_replace(filepath_str, udim, "<UDIM>");
|
||||
@@ -647,7 +647,7 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
|
||||
{
|
||||
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
|
||||
|
||||
if (cobj.data && !b_ob.modifiers.empty() && experimental) {
|
||||
if (cobj.data && b_ob.modifiers.length() > 0 && experimental) {
|
||||
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
|
||||
bool enabled = preview ? mod.show_viewport() : mod.show_render();
|
||||
|
||||
|
@@ -303,7 +303,7 @@ static void rtc_error_func(void *, enum RTCError, const char *str)
|
||||
VLOG(1) << str;
|
||||
}
|
||||
|
||||
static double progress_start_time = 0.0;
|
||||
static double progress_start_time = 0.0f;
|
||||
|
||||
static bool rtc_progress_func(void *user_ptr, const double n)
|
||||
{
|
||||
|
@@ -153,7 +153,7 @@ void BVHNode::update_time()
|
||||
namespace {
|
||||
|
||||
struct DumpTraversalContext {
|
||||
/* Descriptor of while where writing is happening. */
|
||||
/* Descriptor of wile where writing is happening. */
|
||||
FILE *stream;
|
||||
/* Unique identifier of the node current. */
|
||||
int id;
|
||||
|
@@ -178,7 +178,7 @@ class InnerNode : public BVHNode {
|
||||
reset_unused_children();
|
||||
}
|
||||
|
||||
/* NOTE: This function is only used during binary BVH builder, and it's
|
||||
/* NOTE: This function is only used during binary BVH builder, and it
|
||||
* supposed to be configured to have 2 children which will be filled-in in a
|
||||
* bit. But this is important to have children reset to NULL. */
|
||||
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
|
||||
|
@@ -30,17 +30,15 @@ BVHOptiX::BVHOptiX(const BVHParams ¶ms_,
|
||||
: BVH(params_, geometry_, objects_),
|
||||
device(device),
|
||||
traversable_handle(0),
|
||||
as_data(make_unique<device_only_memory<char>>(
|
||||
device, params.top_level ? "optix tlas" : "optix blas", false)),
|
||||
motion_transform_data(
|
||||
make_unique<device_only_memory<char>>(device, "optix motion transform", false))
|
||||
as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
|
||||
motion_transform_data(device, "optix motion transform", false)
|
||||
{
|
||||
}
|
||||
|
||||
BVHOptiX::~BVHOptiX()
|
||||
{
|
||||
/* Acceleration structure memory is delayed freed on device, since deleting the
|
||||
* BVH may happen while still being used for rendering. */
|
||||
// Acceleration structure memory is delayed freed on device, since deleting the
|
||||
// BVH may happen while still being used for rendering.
|
||||
device->release_optix_bvh(this);
|
||||
}
|
||||
|
||||
|
@@ -25,16 +25,14 @@
|
||||
|
||||
# include "device/memory.h"
|
||||
|
||||
# include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BVHOptiX : public BVH {
|
||||
public:
|
||||
Device *device;
|
||||
uint64_t traversable_handle;
|
||||
unique_ptr<device_only_memory<char>> as_data;
|
||||
unique_ptr<device_only_memory<char>> motion_transform_data;
|
||||
device_only_memory<char> as_data;
|
||||
device_only_memory<char> motion_transform_data;
|
||||
|
||||
protected:
|
||||
friend class BVH;
|
||||
|
@@ -88,7 +88,7 @@ endmacro()
|
||||
|
||||
function(cycles_link_directories)
|
||||
if(APPLE)
|
||||
# APPLE platform uses full paths for linking libraries, and avoids link_directories.
|
||||
# APPLE plaform uses full paths for linking libraries, and avoids link_directories.
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
@@ -38,6 +38,7 @@ void device_cpu_info(vector<DeviceInfo> &devices)
|
||||
info.id = "CPU";
|
||||
info.num = 0;
|
||||
info.has_osl = true;
|
||||
info.has_half_images = true;
|
||||
info.has_nanovdb = true;
|
||||
info.has_profiling = true;
|
||||
if (openimagedenoise_supported()) {
|
||||
|
@@ -68,8 +68,8 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_
|
||||
{
|
||||
/* Pick any kernel, all of them are supposed to have same level of microarchitecture
|
||||
* optimization. */
|
||||
VLOG(1) << "Using " << get_cpu_kernels().integrator_init_from_camera.get_uarch_name()
|
||||
<< " CPU kernels.";
|
||||
VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
|
||||
<< " kernels.";
|
||||
|
||||
if (info.cpu_threads == 0) {
|
||||
info.cpu_threads = TaskScheduler::num_threads();
|
||||
@@ -93,6 +93,11 @@ CPUDevice::~CPUDevice()
|
||||
texture_info.free();
|
||||
}
|
||||
|
||||
bool CPUDevice::show_samples() const
|
||||
{
|
||||
return (info.cpu_threads == 1);
|
||||
}
|
||||
|
||||
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
|
||||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
|
||||
@@ -292,6 +297,11 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
|
||||
const CPUKernels *CPUDevice::get_cpu_kernels() const
|
||||
{
|
||||
return &kernels;
|
||||
}
|
||||
|
||||
void CPUDevice::get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
|
||||
{
|
||||
|
@@ -57,9 +57,13 @@ class CPUDevice : public Device {
|
||||
RTCDevice embree_device;
|
||||
#endif
|
||||
|
||||
CPUKernels kernels;
|
||||
|
||||
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
||||
~CPUDevice();
|
||||
|
||||
virtual bool show_samples() const override;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||
@@ -86,6 +90,7 @@ class CPUDevice : public Device {
|
||||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
virtual const CPUKernels *get_cpu_kernels() const override;
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
|
||||
virtual void *get_cpu_osl_memory() override;
|
||||
|
@@ -26,9 +26,6 @@ CCL_NAMESPACE_BEGIN
|
||||
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
||||
|
||||
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
|
||||
#define REGISTER_KERNEL_FILM_CONVERT(name) \
|
||||
film_convert_##name(KERNEL_FUNCTIONS(film_convert_##name)), \
|
||||
film_convert_half_rgba_##name(KERNEL_FUNCTIONS(film_convert_half_rgba_##name))
|
||||
|
||||
CPUKernels::CPUKernels()
|
||||
: /* Integrator. */
|
||||
@@ -53,25 +50,11 @@ CPUKernels::CPUKernels()
|
||||
REGISTER_KERNEL(adaptive_sampling_filter_x),
|
||||
REGISTER_KERNEL(adaptive_sampling_filter_y),
|
||||
/* Cryptomatte. */
|
||||
REGISTER_KERNEL(cryptomatte_postprocess),
|
||||
/* Film Convert. */
|
||||
REGISTER_KERNEL_FILM_CONVERT(depth),
|
||||
REGISTER_KERNEL_FILM_CONVERT(mist),
|
||||
REGISTER_KERNEL_FILM_CONVERT(sample_count),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float),
|
||||
REGISTER_KERNEL_FILM_CONVERT(light_path),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float3),
|
||||
REGISTER_KERNEL_FILM_CONVERT(motion),
|
||||
REGISTER_KERNEL_FILM_CONVERT(cryptomatte),
|
||||
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher),
|
||||
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow),
|
||||
REGISTER_KERNEL_FILM_CONVERT(combined),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float4)
|
||||
REGISTER_KERNEL(cryptomatte_postprocess)
|
||||
{
|
||||
}
|
||||
|
||||
#undef REGISTER_KERNEL
|
||||
#undef REGISTER_KERNEL_FILM_CONVERT
|
||||
#undef KERNEL_FUNCTIONS
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -17,13 +17,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "device/cpu/kernel_function.h"
|
||||
#include "util/half.h"
|
||||
#include "util/types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelGlobalsCPU;
|
||||
struct KernelFilmConvert;
|
||||
struct IntegratorStateCPU;
|
||||
struct TileInfo;
|
||||
|
||||
@@ -42,7 +40,7 @@ class CPUKernels {
|
||||
|
||||
IntegratorInitFunction integrator_init_from_camera;
|
||||
IntegratorInitFunction integrator_init_from_bake;
|
||||
IntegratorShadeFunction integrator_intersect_closest;
|
||||
IntegratorFunction integrator_intersect_closest;
|
||||
IntegratorFunction integrator_intersect_shadow;
|
||||
IntegratorFunction integrator_intersect_subsurface;
|
||||
IntegratorFunction integrator_intersect_volume_stack;
|
||||
@@ -104,41 +102,6 @@ class CPUKernels {
|
||||
|
||||
CryptomattePostprocessFunction cryptomatte_postprocess;
|
||||
|
||||
/* Film Convert. */
|
||||
using FilmConvertFunction = CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
|
||||
const float *buffer,
|
||||
float *pixel,
|
||||
const int width,
|
||||
const int buffer_stride,
|
||||
const int pixel_stride)>;
|
||||
using FilmConvertHalfRGBAFunction =
|
||||
CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
|
||||
const float *buffer,
|
||||
half4 *pixel,
|
||||
const int width,
|
||||
const int buffer_stride)>;
|
||||
|
||||
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
|
||||
FilmConvertFunction film_convert_##name; \
|
||||
FilmConvertHalfRGBAFunction film_convert_half_rgba_##name;
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
CPUKernels();
|
||||
};
|
||||
|
||||
|
@@ -144,6 +144,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
|
||||
info.description = string(name);
|
||||
info.num = num;
|
||||
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_nanovdb = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
|
@@ -46,6 +46,12 @@ bool CUDADevice::have_precompiled_kernels()
|
||||
return path_exists(cubins_path);
|
||||
}
|
||||
|
||||
bool CUDADevice::show_samples() const
|
||||
{
|
||||
/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
|
||||
return true;
|
||||
}
|
||||
|
||||
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
@@ -236,10 +242,6 @@ string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
cflags += " -DWITH_NANOVDB";
|
||||
# endif
|
||||
|
||||
# ifdef WITH_CYCLES_DEBUG
|
||||
cflags += " -DWITH_CYCLES_DEBUG";
|
||||
# endif
|
||||
|
||||
return cflags;
|
||||
}
|
||||
|
||||
@@ -376,9 +378,7 @@ string CUDADevice::compile_kernel(const uint kernel_features,
|
||||
cubin.c_str(),
|
||||
common_cflags.c_str());
|
||||
|
||||
printf("Compiling %sCUDA kernel ...\n%s\n",
|
||||
(use_adaptive_compilation()) ? "adaptive " : "",
|
||||
command.c_str());
|
||||
printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
|
||||
|
||||
# ifdef _WIN32
|
||||
command = "call " + command;
|
||||
@@ -405,15 +405,13 @@ string CUDADevice::compile_kernel(const uint kernel_features,
|
||||
|
||||
bool CUDADevice::load_kernels(const uint kernel_features)
|
||||
{
|
||||
/* TODO(sergey): Support kernels re-load for CUDA devices adaptive compile.
|
||||
/* TODO(sergey): Support kernels re-load for CUDA devices.
|
||||
*
|
||||
* Currently re-loading kernel will invalidate memory pointers,
|
||||
* causing problems in cuCtxSynchronize.
|
||||
*/
|
||||
if (cuModule) {
|
||||
if (use_adaptive_compilation()) {
|
||||
VLOG(1) << "Skipping CUDA kernel reload for adaptive compilation, not currently supported.";
|
||||
}
|
||||
VLOG(1) << "Skipping kernel reload, not currently supported.";
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -775,7 +773,6 @@ void CUDADevice::generic_free(device_memory &mem)
|
||||
if (mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
@@ -930,6 +927,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
/* General variables for both architectures */
|
||||
string bind_name = mem.name;
|
||||
size_t dsize = datatype_size(mem.data_type);
|
||||
size_t size = mem.memory_size();
|
||||
@@ -1092,6 +1090,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
|
||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||
/* Kepler+, bindless textures. */
|
||||
CUDA_RESOURCE_DESC resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
@@ -1142,7 +1141,6 @@ void CUDADevice::tex_free(device_texture &mem)
|
||||
if (mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||
|
||||
if (cmem.texobject) {
|
||||
|
@@ -76,6 +76,8 @@ class CUDADevice : public Device {
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual bool show_samples() const override;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
@@ -23,7 +23,6 @@
|
||||
#include "device/queue.h"
|
||||
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/cpu/kernel.h"
|
||||
#include "device/cuda/device.h"
|
||||
#include "device/dummy/device.h"
|
||||
#include "device/hip/device.h"
|
||||
@@ -286,6 +285,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
info.description = "Multi Device";
|
||||
info.num = 0;
|
||||
|
||||
info.has_half_images = true;
|
||||
info.has_nanovdb = true;
|
||||
info.has_osl = true;
|
||||
info.has_profiling = true;
|
||||
@@ -332,6 +332,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
}
|
||||
|
||||
/* Accumulate device info. */
|
||||
info.has_half_images &= device.has_half_images;
|
||||
info.has_nanovdb &= device.has_nanovdb;
|
||||
info.has_osl &= device.has_osl;
|
||||
info.has_profiling &= device.has_profiling;
|
||||
@@ -362,11 +363,10 @@ unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const CPUKernels &Device::get_cpu_kernels()
|
||||
const CPUKernels *Device::get_cpu_kernels() const
|
||||
{
|
||||
/* Initialize CPU kernels once and reuse. */
|
||||
static CPUKernels kernels;
|
||||
return kernels;
|
||||
LOG(FATAL) << "Device does not support CPU kernels.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Device::get_cpu_kernel_thread_globals(
|
||||
|
@@ -73,6 +73,7 @@ class DeviceInfo {
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_half_images; /* Support half-float textures. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||
@@ -89,6 +90,7 @@ class DeviceInfo {
|
||||
num = 0;
|
||||
cpu_threads = 0;
|
||||
display_device = false;
|
||||
has_half_images = false;
|
||||
has_nanovdb = false;
|
||||
has_osl = false;
|
||||
has_profiling = false;
|
||||
@@ -149,6 +151,10 @@ class Device {
|
||||
fprintf(stderr, "%s\n", error.c_str());
|
||||
fflush(stderr);
|
||||
}
|
||||
virtual bool show_samples() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
|
||||
|
||||
/* statistics */
|
||||
@@ -174,7 +180,7 @@ class Device {
|
||||
* These may not be used on GPU or multi-devices. */
|
||||
|
||||
/* Get CPU kernel functions for native instruction set. */
|
||||
static const CPUKernels &get_cpu_kernels();
|
||||
virtual const CPUKernels *get_cpu_kernels() const;
|
||||
/* Get kernel globals to pass to kernels. */
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);
|
||||
|
@@ -131,9 +131,9 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!hipSupportsDevice(num)) {
|
||||
continue;
|
||||
}
|
||||
int major;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
|
||||
// TODO : (Arya) What is the last major version we are supporting?
|
||||
|
||||
DeviceInfo info;
|
||||
|
||||
@@ -141,6 +141,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
||||
info.description = string(name);
|
||||
info.num = num;
|
||||
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_nanovdb = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
|
@@ -47,6 +47,12 @@ bool HIPDevice::have_precompiled_kernels()
|
||||
return path_exists(fatbins_path);
|
||||
}
|
||||
|
||||
bool HIPDevice::show_samples() const
|
||||
{
|
||||
/* The HIPDevice only processes one tile at a time, so showing samples is fine. */
|
||||
return true;
|
||||
}
|
||||
|
||||
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
|
||||
{
|
||||
return BVH_LAYOUT_BVH2;
|
||||
@@ -93,7 +99,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
}
|
||||
|
||||
/* Setup device and context. */
|
||||
result = hipDeviceGet(&hipDevice, hipDevId);
|
||||
result = hipGetDevice(&hipDevice, hipDevId);
|
||||
if (result != hipSuccess) {
|
||||
set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
|
||||
hipewErrorString(result)));
|
||||
@@ -140,18 +146,12 @@ HIPDevice::~HIPDevice()
|
||||
|
||||
bool HIPDevice::support_device(const uint /*kernel_features*/)
|
||||
{
|
||||
if (hipSupportsDevice(hipDevId)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
/* We only support Navi and above. */
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, hipDevId);
|
||||
int major, minor;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
|
||||
set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
|
||||
props.name));
|
||||
return false;
|
||||
}
|
||||
// TODO : (Arya) What versions do we plan to support?
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HIPDevice::check_peer_access(Device *peer_device)
|
||||
@@ -216,6 +216,7 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
const string include_path = source_path;
|
||||
string cflags = string_printf(
|
||||
"-m%d "
|
||||
"--ptxas-options=\"-v\" "
|
||||
"--use_fast_math "
|
||||
"-DHIPCC "
|
||||
"-I\"%s\"",
|
||||
@@ -227,7 +228,10 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
return cflags;
|
||||
}
|
||||
|
||||
string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
|
||||
string HIPDevice::compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base,
|
||||
bool force_ptx)
|
||||
{
|
||||
/* Compute kernel name. */
|
||||
int major, minor;
|
||||
@@ -236,20 +240,35 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, hipDevId);
|
||||
|
||||
/* gcnArchName can contain tokens after the arch name with features, ie.
|
||||
* `gfx1010:sramecc-:xnack-` so we tokenize it to get the first part. */
|
||||
char *arch = strtok(props.gcnArchName, ":");
|
||||
if (arch == NULL) {
|
||||
arch = props.gcnArchName;
|
||||
}
|
||||
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if (!use_adaptive_compilation()) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
return fatbin;
|
||||
if (!force_ptx) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
return fatbin;
|
||||
}
|
||||
}
|
||||
|
||||
/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
|
||||
int ptx_major = major, ptx_minor = minor;
|
||||
while (ptx_major >= 3) {
|
||||
const string ptx = path_get(
|
||||
string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
|
||||
if (path_exists(ptx)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
return ptx;
|
||||
}
|
||||
|
||||
if (ptx_minor > 0) {
|
||||
ptx_minor--;
|
||||
}
|
||||
else {
|
||||
ptx_major--;
|
||||
ptx_minor = 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -273,10 +292,12 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
# ifdef _DEBUG
|
||||
options.append(" -save-temps");
|
||||
# endif
|
||||
options.append(" --amdgpu-target=").append(arch);
|
||||
options.append(" --amdgpu-target=").append(props.gcnArchName);
|
||||
|
||||
const string include_path = source_path;
|
||||
const string fatbin_file = string_printf("cycles_%s_%s_%s", name, arch, kernel_md5.c_str());
|
||||
const char *const kernel_arch = props.gcnArchName;
|
||||
const string fatbin_file = string_printf(
|
||||
"cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
|
||||
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
|
||||
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
@@ -286,9 +307,9 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
|
||||
# ifdef _WIN32
|
||||
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
||||
if (!hipSupportsDevice(hipDevId)) {
|
||||
if (major < 3) {
|
||||
set_error(
|
||||
string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
|
||||
string_printf("HIP backend requires compute capability 3.0 or up, but found %d.%d. "
|
||||
"Your GPU is not supported.",
|
||||
major,
|
||||
minor));
|
||||
@@ -339,9 +360,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
source_path.c_str(),
|
||||
fatbin.c_str());
|
||||
|
||||
printf("Compiling %sHIP kernel ...\n%s\n",
|
||||
(use_adaptive_compilation()) ? "adaptive " : "",
|
||||
command.c_str());
|
||||
printf("Compiling HIP kernel ...\n%s\n", command.c_str());
|
||||
|
||||
# ifdef _WIN32
|
||||
command = "call " + command;
|
||||
@@ -368,14 +387,13 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
|
||||
bool HIPDevice::load_kernels(const uint kernel_features)
|
||||
{
|
||||
/* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
|
||||
/* TODO(sergey): Support kernels re-load for HIP devices.
|
||||
*
|
||||
* Currently re-loading kernels will invalidate memory pointers.
|
||||
* Currently re-loading kernel will invalidate memory pointers,
|
||||
* causing problems in hipCtxSynchronize.
|
||||
*/
|
||||
if (hipModule) {
|
||||
if (use_adaptive_compilation()) {
|
||||
VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
|
||||
}
|
||||
VLOG(1) << "Skipping kernel reload, not currently supported.";
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -384,9 +402,8 @@ bool HIPDevice::load_kernels(const uint kernel_features)
|
||||
return false;
|
||||
|
||||
/* check if GPU is supported */
|
||||
if (!support_device(kernel_features)) {
|
||||
if (!support_device(kernel_features))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* get kernel */
|
||||
const char *kernel_name = "kernel";
|
||||
@@ -738,7 +755,6 @@ void HIPDevice::generic_free(device_memory &mem)
|
||||
if (mem.device_pointer) {
|
||||
HIPContextScope scope(this);
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||
const HIPMem &cmem = hip_mem_map[&mem];
|
||||
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
@@ -892,6 +908,7 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
/* General variables for both architectures */
|
||||
string bind_name = mem.name;
|
||||
size_t dsize = datatype_size(mem.data_type);
|
||||
size_t size = mem.memory_size();
|
||||
@@ -981,16 +998,16 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
|
||||
hip_assert(hipArray3DCreate(&array_3d, &desc));
|
||||
|
||||
if (!array_3d) {
|
||||
return;
|
||||
}
|
||||
|
||||
HIP_MEMCPY3D param;
|
||||
memset(¶m, 0, sizeof(HIP_MEMCPY3D));
|
||||
memset(¶m, 0, sizeof(param));
|
||||
param.dstMemoryType = hipMemoryTypeArray;
|
||||
param.dstArray = array_3d;
|
||||
param.dstArray = &array_3d;
|
||||
param.srcMemoryType = hipMemoryTypeHost;
|
||||
param.srcHost = mem.host_pointer;
|
||||
param.srcPitch = src_pitch;
|
||||
@@ -1056,13 +1073,13 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
|
||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||
/* Bindless textures. */
|
||||
/* Kepler+, bindless textures. */
|
||||
hipResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
if (array_3d) {
|
||||
resDesc.resType = hipResourceTypeArray;
|
||||
resDesc.res.array.h_Array = array_3d;
|
||||
resDesc.res.array.h_Array = &array_3d;
|
||||
resDesc.flags = 0;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
@@ -1107,7 +1124,6 @@ void HIPDevice::tex_free(device_texture &mem)
|
||||
if (mem.device_pointer) {
|
||||
HIPContextScope scope(this);
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||
const HIPMem &cmem = hip_mem_map[&mem];
|
||||
|
||||
if (cmem.texobject) {
|
||||
@@ -1148,8 +1164,6 @@ bool HIPDevice::should_use_graphics_interop()
|
||||
* possible, but from the empiric measurements it can be considerably slower than using naive
|
||||
* pixels copy. */
|
||||
|
||||
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
|
||||
# if 0
|
||||
HIPContextScope scope(this);
|
||||
|
||||
int num_all_devices = 0;
|
||||
@@ -1168,7 +1182,6 @@ bool HIPDevice::should_use_graphics_interop()
|
||||
return true;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@@ -75,6 +75,8 @@ class HIPDevice : public Device {
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual bool show_samples() const override;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
@@ -91,7 +93,10 @@ class HIPDevice : public Device {
|
||||
|
||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
|
||||
string compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base = "hip",
|
||||
bool force_ptx = false);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
@@ -48,7 +48,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
HIPDeviceQueue *queue_ = nullptr;
|
||||
HIPDevice *device_ = nullptr;
|
||||
|
||||
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
|
||||
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
|
||||
uint opengl_pbo_id_ = 0;
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
@@ -58,15 +58,6 @@ const char *hipewCompilerPath();
|
||||
int hipewCompilerVersion();
|
||||
# endif /* WITH_HIP_DYNLOAD */
|
||||
|
||||
static inline bool hipSupportsDevice(const int hipDevId)
|
||||
{
|
||||
int major, minor;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
|
||||
return (major > 10) || (major == 10 && minor >= 1);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
||||
|
@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
||||
: data_type(device_type_traits<uchar>::data_type),
|
||||
data_elements(device_type_traits<uchar>::num_elements),
|
||||
data_elements(device_type_traits<uchar>::num_elements_cpu),
|
||||
data_size(0),
|
||||
device_size(0),
|
||||
data_width(0),
|
||||
@@ -44,6 +44,45 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
||||
{
|
||||
}
|
||||
|
||||
device_memory::device_memory(device_memory &&other) noexcept
|
||||
: data_type(other.data_type),
|
||||
data_elements(other.data_elements),
|
||||
data_size(other.data_size),
|
||||
device_size(other.device_size),
|
||||
data_width(other.data_width),
|
||||
data_height(other.data_height),
|
||||
data_depth(other.data_depth),
|
||||
type(other.type),
|
||||
name(other.name),
|
||||
device(other.device),
|
||||
device_pointer(other.device_pointer),
|
||||
host_pointer(other.host_pointer),
|
||||
shared_pointer(other.shared_pointer),
|
||||
shared_counter(other.shared_counter),
|
||||
original_device_ptr(other.original_device_ptr),
|
||||
original_device_size(other.original_device_size),
|
||||
original_device(other.original_device),
|
||||
need_realloc_(other.need_realloc_),
|
||||
modified(other.modified)
|
||||
{
|
||||
other.data_elements = 0;
|
||||
other.data_size = 0;
|
||||
other.device_size = 0;
|
||||
other.data_width = 0;
|
||||
other.data_height = 0;
|
||||
other.data_depth = 0;
|
||||
other.device = 0;
|
||||
other.device_pointer = 0;
|
||||
other.host_pointer = 0;
|
||||
other.shared_pointer = 0;
|
||||
other.shared_counter = 0;
|
||||
other.original_device_ptr = 0;
|
||||
other.original_device_size = 0;
|
||||
other.original_device = 0;
|
||||
other.need_realloc_ = false;
|
||||
other.modified = false;
|
||||
}
|
||||
|
||||
device_memory::~device_memory()
|
||||
{
|
||||
assert(shared_pointer == 0);
|
||||
|
@@ -81,140 +81,155 @@ static constexpr size_t datatype_size(DataType datatype)
|
||||
|
||||
template<typename T> struct device_type_traits {
|
||||
static const DataType data_type = TYPE_UNKNOWN;
|
||||
static const size_t num_elements = sizeof(T);
|
||||
static const size_t num_elements_cpu = sizeof(T);
|
||||
static const size_t num_elements_gpu = sizeof(T);
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar2> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements = 2;
|
||||
static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar3> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements = 3;
|
||||
static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 3;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar4> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint2> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements = 2;
|
||||
static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint3> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements = 3;
|
||||
static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 3;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint4> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(int) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int2> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements = 2;
|
||||
static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int3> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int4> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(float) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float2> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements = 2;
|
||||
static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float3> {
|
||||
/* float3 has different size depending on the device, can't use it for interchanging
|
||||
* memory between CPU and GPU.
|
||||
*
|
||||
* Leave body empty to trigger a compile error if used. */
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<packed_float3> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements = 3;
|
||||
static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float4> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<half> {
|
||||
static const DataType data_type = TYPE_HALF;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(half) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<ushort4> {
|
||||
static const DataType data_type = TYPE_UINT16;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint16_t> {
|
||||
static const DataType data_type = TYPE_UINT16;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<half4> {
|
||||
static const DataType data_type = TYPE_HALF;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint64_t> {
|
||||
static const DataType data_type = TYPE_UINT64;
|
||||
static const size_t num_elements = 1;
|
||||
static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
/* Device Memory
|
||||
@@ -266,16 +281,11 @@ class device_memory {
|
||||
|
||||
/* Only create through subclasses. */
|
||||
device_memory(Device *device, const char *name, MemoryType type);
|
||||
device_memory(device_memory &&other) noexcept;
|
||||
|
||||
/* No copying and allowed.
|
||||
*
|
||||
* This is because device implementation might need to register device memory in an allocation
|
||||
* map of some sort and use pointer as a key to identify blocks. Moving data from one place to
|
||||
* another bypassing device allocation routines will make those maps hard to maintain. */
|
||||
/* No copying allowed. */
|
||||
device_memory(const device_memory &) = delete;
|
||||
device_memory(device_memory &&other) noexcept = delete;
|
||||
device_memory &operator=(const device_memory &) = delete;
|
||||
device_memory &operator=(device_memory &&) = delete;
|
||||
|
||||
/* Host allocation on the device. All host_pointer memory should be
|
||||
* allocated with these functions, for devices that support using
|
||||
@@ -310,7 +320,9 @@ template<typename T> class device_only_memory : public device_memory {
|
||||
: device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
|
||||
{
|
||||
data_type = device_type_traits<T>::data_type;
|
||||
data_elements = max(device_type_traits<T>::num_elements, 1);
|
||||
data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
|
||||
device_type_traits<T>::num_elements_gpu,
|
||||
1);
|
||||
}
|
||||
|
||||
device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
|
||||
@@ -366,11 +378,15 @@ template<typename T> class device_only_memory : public device_memory {
|
||||
|
||||
template<typename T> class device_vector : public device_memory {
|
||||
public:
|
||||
/* Can only use this for types that have the same size on CPU and GPU. */
|
||||
static_assert(device_type_traits<T>::num_elements_cpu ==
|
||||
device_type_traits<T>::num_elements_gpu);
|
||||
|
||||
device_vector(Device *device, const char *name, MemoryType type)
|
||||
: device_memory(device, name, type)
|
||||
{
|
||||
data_type = device_type_traits<T>::data_type;
|
||||
data_elements = device_type_traits<T>::num_elements;
|
||||
data_elements = device_type_traits<T>::num_elements_cpu;
|
||||
modified = true;
|
||||
need_realloc_ = true;
|
||||
|
||||
|
@@ -109,6 +109,14 @@ class MultiDevice : public Device {
|
||||
return error_msg;
|
||||
}
|
||||
|
||||
virtual bool show_samples() const override
|
||||
{
|
||||
if (devices.size() > 1) {
|
||||
return false;
|
||||
}
|
||||
return devices.front().device->show_samples();
|
||||
}
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override
|
||||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
|
@@ -48,6 +48,14 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
||||
{
|
||||
}
|
||||
|
||||
OptiXDevice::Denoiser::~Denoiser()
|
||||
{
|
||||
const CUDAContextScope scope(device);
|
||||
if (optix_denoiser != nullptr) {
|
||||
optixDenoiserDestroy(optix_denoiser);
|
||||
}
|
||||
}
|
||||
|
||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: CUDADevice(info, stats, profiler),
|
||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||
@@ -83,7 +91,6 @@ OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
};
|
||||
# endif
|
||||
if (DebugFlags().optix.use_debug) {
|
||||
VLOG(1) << "Using OptiX debug mode.";
|
||||
options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
|
||||
}
|
||||
optix_assert(optixDeviceContextCreate(cuContext, &options, &context));
|
||||
@@ -125,11 +132,6 @@ OptiXDevice::~OptiXDevice()
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure denoiser is destroyed before device context! */
|
||||
if (denoiser_.optix_denoiser != nullptr) {
|
||||
optixDenoiserDestroy(denoiser_.optix_denoiser);
|
||||
}
|
||||
|
||||
optixDeviceContextDestroy(context);
|
||||
}
|
||||
|
||||
@@ -881,31 +883,27 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
||||
optix_assert(optixDenoiserComputeMemoryResources(
|
||||
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
|
||||
|
||||
/* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
|
||||
denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
|
||||
denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes;
|
||||
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
||||
|
||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
||||
|
||||
/* Initialize denoiser state for the current tile size. */
|
||||
const OptixResult result = optixDenoiserSetup(
|
||||
denoiser_.optix_denoiser,
|
||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||
on a stream that is not the default stream */
|
||||
buffer_params.width,
|
||||
buffer_params.height,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
||||
denoiser_.scratch_size);
|
||||
const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser,
|
||||
denoiser_.queue.stream(),
|
||||
buffer_params.width,
|
||||
buffer_params.height,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.state.device_pointer +
|
||||
denoiser_.scratch_offset,
|
||||
denoiser_.scratch_size);
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
set_error("Failed to set up OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
denoiser_.is_configured = true;
|
||||
denoiser_.configured_size.x = buffer_params.width;
|
||||
denoiser_.configured_size.y = buffer_params.height;
|
||||
@@ -940,6 +938,8 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
||||
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
device_vector<float> fake_albedo(this, "fake_albedo", MEM_READ_WRITE);
|
||||
|
||||
/* Optional albedo and color passes. */
|
||||
if (context.num_input_passes > 1) {
|
||||
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||
@@ -970,7 +970,6 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
||||
|
||||
/* Finally run denoising. */
|
||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||
|
||||
OptixDenoiserLayer image_layers = {};
|
||||
image_layers.input = color_layer;
|
||||
image_layers.output = output_layer;
|
||||
@@ -1032,7 +1031,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
return false;
|
||||
}
|
||||
|
||||
device_only_memory<char> &out_data = *bvh->as_data;
|
||||
device_only_memory<char> &out_data = bvh->as_data;
|
||||
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
||||
assert(out_data.device == this);
|
||||
out_data.alloc_to_device(sizes.outputSizeInBytes);
|
||||
@@ -1123,7 +1122,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
operation = OPTIX_BUILD_OPERATION_UPDATE;
|
||||
}
|
||||
else {
|
||||
bvh_optix->as_data->free();
|
||||
bvh_optix->as_data.free();
|
||||
bvh_optix->traversable_handle = 0;
|
||||
}
|
||||
|
||||
@@ -1344,9 +1343,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
unsigned int num_instances = 0;
|
||||
unsigned int max_num_instances = 0xFFFFFFFF;
|
||||
|
||||
bvh_optix->as_data->free();
|
||||
bvh_optix->as_data.free();
|
||||
bvh_optix->traversable_handle = 0;
|
||||
bvh_optix->motion_transform_data->free();
|
||||
bvh_optix->motion_transform_data.free();
|
||||
|
||||
optixDeviceContextGetProperty(context,
|
||||
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
||||
@@ -1379,8 +1378,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
}
|
||||
}
|
||||
|
||||
assert(bvh_optix->motion_transform_data->device == this);
|
||||
bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
|
||||
assert(bvh_optix->motion_transform_data.device == this);
|
||||
bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
|
||||
}
|
||||
|
||||
for (Object *ob : bvh->objects) {
|
||||
@@ -1441,7 +1440,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
|
||||
motion_transform_offset = align_up(motion_transform_offset,
|
||||
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
|
||||
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
|
||||
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
|
||||
motion_transform_offset;
|
||||
motion_transform_offset += motion_transform_size;
|
||||
|
||||
|
@@ -23,7 +23,6 @@
|
||||
# include "device/optix/queue.h"
|
||||
# include "device/optix/util.h"
|
||||
# include "kernel/types.h"
|
||||
# include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -77,12 +76,13 @@ class OptiXDevice : public CUDADevice {
|
||||
device_only_memory<KernelParamsOptiX> launch_params;
|
||||
OptixTraversableHandle tlas_handle = 0;
|
||||
|
||||
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||
vector<device_only_memory<char>> delayed_free_bvh_memory;
|
||||
thread_mutex delayed_free_bvh_mutex;
|
||||
|
||||
class Denoiser {
|
||||
public:
|
||||
explicit Denoiser(OptiXDevice *device);
|
||||
~Denoiser();
|
||||
|
||||
OptiXDevice *device;
|
||||
OptiXDeviceQueue queue;
|
||||
|
@@ -73,8 +73,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
||||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||
|
@@ -3,7 +3,7 @@ This program uses code from various sources, the default license is Apache 2.0
|
||||
for all code, with the following exceptions.
|
||||
|
||||
Modified BSD License
|
||||
* Code adapted from Open Shading Language
|
||||
* Code adapated from Open Shading Language
|
||||
* Sobol direction vectors
|
||||
* Matrix inversion code from OpenEXR
|
||||
* MD5 Hash code
|
||||
|
@@ -29,14 +29,23 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
||||
{
|
||||
DCHECK(params.use);
|
||||
|
||||
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
|
||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||
switch (params.type) {
|
||||
case DENOISER_OPTIX:
|
||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||
|
||||
case DENOISER_OPENIMAGEDENOISE:
|
||||
return make_unique<OIDNDenoiser>(path_trace_device, params);
|
||||
|
||||
case DENOISER_NUM:
|
||||
case DENOISER_NONE:
|
||||
case DENOISER_ALL:
|
||||
/* pass */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Always fallback to OIDN. */
|
||||
DenoiseParams oidn_params = params;
|
||||
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
|
||||
return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
|
||||
LOG(FATAL) << "Unhandled denoiser type " << params.type << ", should never happen.";
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
|
@@ -138,6 +138,10 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pass_access_info_.offset == PASS_UNUSED) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const PassType type = pass_access_info_.type;
|
||||
const PassMode mode = pass_access_info_.mode;
|
||||
const PassInfo pass_info = Pass::get_info(type, pass_access_info_.include_albedo);
|
||||
|
@@ -14,12 +14,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
|
||||
#include "session/buffers.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/tbb.h"
|
||||
|
||||
@@ -36,16 +33,70 @@ CCL_NAMESPACE_BEGIN
|
||||
* Kernel processing.
|
||||
*/
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
{
|
||||
KernelFilmConvert kfilm_convert;
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
|
||||
|
||||
if (destination.pixels) {
|
||||
/* NOTE: No overlays are applied since they are not used for final renders.
|
||||
* Can be supported via some sort of specialization to avoid code duplication. */
|
||||
|
||||
run_get_pass_kernel_processor_float(
|
||||
&kfilm_convert, render_buffers, buffer_params, destination, processor);
|
||||
}
|
||||
|
||||
if (destination.pixels_half_rgba) {
|
||||
/* TODO(sergey): Consider adding specialization to avoid per-pixel overlay check. */
|
||||
|
||||
if (destination.num_components == 1) {
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
|
||||
render_buffers,
|
||||
buffer_params,
|
||||
destination,
|
||||
[&processor](const KernelFilmConvert *kfilm_convert,
|
||||
ccl_global const float *buffer,
|
||||
float *pixel_rgba) {
|
||||
float pixel;
|
||||
processor(kfilm_convert, buffer, &pixel);
|
||||
|
||||
pixel_rgba[0] = pixel;
|
||||
pixel_rgba[1] = pixel;
|
||||
pixel_rgba[2] = pixel;
|
||||
pixel_rgba[3] = 1.0f;
|
||||
});
|
||||
}
|
||||
else if (destination.num_components == 3) {
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
|
||||
render_buffers,
|
||||
buffer_params,
|
||||
destination,
|
||||
[&processor](const KernelFilmConvert *kfilm_convert,
|
||||
ccl_global const float *buffer,
|
||||
float *pixel_rgba) {
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
pixel_rgba[3] = 1.0f;
|
||||
});
|
||||
}
|
||||
else if (destination.num_components == 4) {
|
||||
run_get_pass_kernel_processor_half_rgba(
|
||||
&kfilm_convert, render_buffers, buffer_params, destination, processor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertFunction func) const
|
||||
const Processor &processor) const
|
||||
{
|
||||
/* NOTE: No overlays are applied since they are not used for final renders.
|
||||
* Can be supported via some sort of specialization to avoid code duplication. */
|
||||
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
@@ -61,16 +112,21 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
float *pixel = destination.pixels +
|
||||
(y * buffer_params.width + destination.offset) * pixel_stride;
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride);
|
||||
|
||||
for (int64_t x = 0; x < buffer_params.window_width;
|
||||
++x, buffer += pass_stride, pixel += pixel_stride) {
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertHalfRGBAFunction func) const
|
||||
const Processor &processor) const
|
||||
{
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
@@ -85,7 +141,16 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
half4 *pixel = dst_start + y * destination_stride;
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride);
|
||||
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
|
||||
|
||||
float pixel_rgba[4];
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
*pixel = float4_to_half4_display(
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -98,25 +163,8 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const BufferParams &buffer_params, \
|
||||
const Destination &destination) const \
|
||||
{ \
|
||||
const CPUKernels &kernels = Device::get_cpu_kernels(); \
|
||||
KernelFilmConvert kfilm_convert; \
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination); \
|
||||
\
|
||||
if (destination.pixels) { \
|
||||
run_get_pass_kernel_processor_float(&kfilm_convert, \
|
||||
render_buffers, \
|
||||
buffer_params, \
|
||||
destination, \
|
||||
kernels.film_convert_##pass); \
|
||||
} \
|
||||
\
|
||||
if (destination.pixels_half_rgba) { \
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert, \
|
||||
render_buffers, \
|
||||
buffer_params, \
|
||||
destination, \
|
||||
kernels.film_convert_half_rgba_##pass); \
|
||||
} \
|
||||
run_get_pass_kernel_processor( \
|
||||
render_buffers, buffer_params, destination, film_get_pass_pixel_##pass); \
|
||||
}
|
||||
|
||||
/* Float (scalar) passes. */
|
||||
|
@@ -16,8 +16,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "device/cpu/kernel.h"
|
||||
|
||||
#include "integrator/pass_accessor.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
@@ -30,19 +28,25 @@ class PassAccessorCPU : public PassAccessor {
|
||||
using PassAccessor::PassAccessor;
|
||||
|
||||
protected:
|
||||
inline void run_get_pass_kernel_processor_float(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertFunction func) const;
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
inline void run_get_pass_kernel_processor_half_rgba(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertHalfRGBAFunction func) const;
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor_float(const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor_half_rgba(const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
#define DECLARE_PASS_ACCESSOR(pass) \
|
||||
virtual void get_pass_##pass(const RenderBuffers *render_buffers, \
|
||||
|
@@ -296,13 +296,13 @@ static BufferParams scale_buffer_params(const BufferParams ¶ms, int resoluti
|
||||
|
||||
scaled_params.window_x = params.window_x / resolution_divider;
|
||||
scaled_params.window_y = params.window_y / resolution_divider;
|
||||
scaled_params.window_width = max(1, params.window_width / resolution_divider);
|
||||
scaled_params.window_height = max(1, params.window_height / resolution_divider);
|
||||
scaled_params.window_width = params.window_width / resolution_divider;
|
||||
scaled_params.window_height = params.window_height / resolution_divider;
|
||||
|
||||
scaled_params.full_x = params.full_x / resolution_divider;
|
||||
scaled_params.full_y = params.full_y / resolution_divider;
|
||||
scaled_params.full_width = max(1, params.full_width / resolution_divider);
|
||||
scaled_params.full_height = max(1, params.full_height / resolution_divider);
|
||||
scaled_params.full_width = params.full_width / resolution_divider;
|
||||
scaled_params.full_height = params.full_height / resolution_divider;
|
||||
|
||||
scaled_params.update_offset_stride();
|
||||
|
||||
@@ -380,10 +380,7 @@ void PathTrace::path_trace(RenderWork &render_work)
|
||||
PathTraceWork *path_trace_work = path_trace_works_[i].get();
|
||||
|
||||
PathTraceWork::RenderStatistics statistics;
|
||||
path_trace_work->render_samples(statistics,
|
||||
render_work.path_trace.start_sample,
|
||||
num_samples,
|
||||
render_work.path_trace.sample_offset);
|
||||
path_trace_work->render_samples(statistics, render_work.path_trace.start_sample, num_samples);
|
||||
|
||||
const double work_time = time_dt() - work_start_time;
|
||||
work_balance_infos_[i].time_spent += work_time;
|
||||
@@ -850,11 +847,9 @@ void PathTrace::progress_update_if_needed(const RenderWork &render_work)
|
||||
{
|
||||
if (progress_ != nullptr) {
|
||||
const int2 tile_size = get_render_tile_size();
|
||||
const uint64_t num_samples_added = uint64_t(tile_size.x) * tile_size.y *
|
||||
render_work.path_trace.num_samples;
|
||||
const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
|
||||
const int current_sample = render_work.path_trace.start_sample +
|
||||
render_work.path_trace.num_samples -
|
||||
render_work.path_trace.sample_offset;
|
||||
render_work.path_trace.num_samples;
|
||||
progress_->add_samples(num_samples_added, current_sample);
|
||||
}
|
||||
|
||||
|
@@ -76,7 +76,7 @@ class PathTraceDisplay {
|
||||
|
||||
/* Copy buffer of rendered pixels of a given size into a given position of the texture.
|
||||
*
|
||||
* This function does not acquire a lock. The reason for this is to allow use of this function
|
||||
* This function does not acquire a lock. The reason for this is is to allow use of this function
|
||||
* for partial updates from different devices. In this case the caller will acquire the lock
|
||||
* once, update all the slices and release
|
||||
* the lock once. This will ensure that draw() will never use partially updated texture. */
|
||||
|
@@ -75,10 +75,7 @@ class PathTraceWork {
|
||||
|
||||
/* Render given number of samples as a synchronous blocking call.
|
||||
* The samples are added to the render buffer associated with this work. */
|
||||
virtual void render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset) = 0;
|
||||
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num) = 0;
|
||||
|
||||
/* Copy render result from this work to the corresponding place of the GPU display.
|
||||
*
|
||||
|
@@ -58,7 +58,7 @@ PathTraceWorkCPU::PathTraceWorkCPU(Device *device,
|
||||
DeviceScene *device_scene,
|
||||
bool *cancel_requested_flag)
|
||||
: PathTraceWork(device, film, device_scene, cancel_requested_flag),
|
||||
kernels_(Device::get_cpu_kernels())
|
||||
kernels_(*(device->get_cpu_kernels()))
|
||||
{
|
||||
DCHECK_EQ(device->info.type, DEVICE_CPU);
|
||||
}
|
||||
@@ -71,17 +71,14 @@ void PathTraceWorkCPU::init_execution()
|
||||
|
||||
void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset)
|
||||
int samples_num)
|
||||
{
|
||||
const int64_t image_width = effective_buffer_params_.width;
|
||||
const int64_t image_height = effective_buffer_params_.height;
|
||||
const int64_t total_pixels_num = image_width * image_height;
|
||||
|
||||
if (device_->profiler.active()) {
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.start_profiling();
|
||||
}
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.start_profiling();
|
||||
}
|
||||
|
||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||
@@ -100,7 +97,6 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
work_tile.w = 1;
|
||||
work_tile.h = 1;
|
||||
work_tile.start_sample = start_sample;
|
||||
work_tile.sample_offset = sample_offset;
|
||||
work_tile.num_samples = 1;
|
||||
work_tile.offset = effective_buffer_params_.offset;
|
||||
work_tile.stride = effective_buffer_params_.stride;
|
||||
@@ -110,10 +106,9 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
||||
});
|
||||
});
|
||||
if (device_->profiler.active()) {
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.stop_profiling();
|
||||
}
|
||||
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.stop_profiling();
|
||||
}
|
||||
|
||||
statistics.occupancy = 1.0f;
|
||||
|
@@ -48,8 +48,7 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
|
||||
virtual void render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset) override;
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
|
@@ -250,8 +250,7 @@ void PathTraceWorkGPU::init_execution()
|
||||
|
||||
void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset)
|
||||
int samples_num)
|
||||
{
|
||||
/* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to
|
||||
* add more work (because tiles are smaller, so there is higher chance that more paths will
|
||||
@@ -262,7 +261,6 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||
start_sample,
|
||||
samples_num,
|
||||
sample_offset,
|
||||
device_scene_->data.integrator.scrambling_distance);
|
||||
|
||||
enqueue_reset();
|
||||
@@ -439,15 +437,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
||||
DCHECK_LE(work_size, max_num_paths_);
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
||||
/* Closest ray intersection kernels with integrator state and render buffer. */
|
||||
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
||||
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
||||
|
||||
queue_->enqueue(kernel, work_size, args);
|
||||
break;
|
||||
}
|
||||
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
||||
@@ -817,10 +807,10 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
interop_use_ = device->should_use_graphics_interop();
|
||||
|
||||
if (interop_use_) {
|
||||
VLOG(2) << "Using graphics interop GPU display update.";
|
||||
VLOG(2) << "Will be using graphics interop GPU display update.";
|
||||
}
|
||||
else {
|
||||
VLOG(2) << "Using naive GPU display update.";
|
||||
VLOG(2) << "Will be using naive GPU display update.";
|
||||
}
|
||||
|
||||
interop_use_checked_ = true;
|
||||
|
@@ -46,8 +46,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
|
||||
virtual void render_samples(RenderStatistics &statistics,
|
||||
int start_sample,
|
||||
int samples_num,
|
||||
int sample_offset) override;
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
|
@@ -88,16 +88,6 @@ int RenderScheduler::get_num_samples() const
|
||||
return num_samples_;
|
||||
}
|
||||
|
||||
void RenderScheduler::set_sample_offset(int sample_offset)
|
||||
{
|
||||
sample_offset_ = sample_offset;
|
||||
}
|
||||
|
||||
int RenderScheduler::get_sample_offset() const
|
||||
{
|
||||
return sample_offset_;
|
||||
}
|
||||
|
||||
void RenderScheduler::set_time_limit(double time_limit)
|
||||
{
|
||||
time_limit_ = time_limit;
|
||||
@@ -120,15 +110,13 @@ int RenderScheduler::get_num_rendered_samples() const
|
||||
return state_.num_rendered_samples;
|
||||
}
|
||||
|
||||
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
|
||||
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
|
||||
{
|
||||
buffer_params_ = buffer_params;
|
||||
|
||||
update_start_resolution_divider();
|
||||
|
||||
set_num_samples(num_samples);
|
||||
set_start_sample(sample_offset);
|
||||
set_sample_offset(sample_offset);
|
||||
|
||||
/* In background mode never do lower resolution render preview, as it is not really supported
|
||||
* by the software. */
|
||||
@@ -183,7 +171,7 @@ void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples,
|
||||
|
||||
void RenderScheduler::reset_for_next_tile()
|
||||
{
|
||||
reset(buffer_params_, num_samples_, sample_offset_);
|
||||
reset(buffer_params_, num_samples_);
|
||||
}
|
||||
|
||||
bool RenderScheduler::render_work_reschedule_on_converge(RenderWork &render_work)
|
||||
@@ -329,7 +317,6 @@ RenderWork RenderScheduler::get_render_work()
|
||||
|
||||
render_work.path_trace.start_sample = get_start_sample_to_path_trace();
|
||||
render_work.path_trace.num_samples = get_num_samples_to_path_trace();
|
||||
render_work.path_trace.sample_offset = get_sample_offset();
|
||||
|
||||
render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
|
||||
|
||||
@@ -840,26 +827,6 @@ int RenderScheduler::get_num_samples_to_path_trace() const
|
||||
num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
|
||||
}
|
||||
|
||||
/* When time limit is used clamp the calculated number of samples to keep occupancy.
|
||||
* This is because time limit causes the last render iteration to happen with less number of
|
||||
* samples, which conflicts with the occupancy (lower number of samples causes lower
|
||||
* occupancy, also the calculation is based on number of previously rendered samples).
|
||||
*
|
||||
* When time limit is not used the number of samples per render iteration is either increasing
|
||||
* or stays the same, so there is no need to clamp number of samples calculated for occupancy.
|
||||
*/
|
||||
if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
|
||||
const double remaining_render_time = max(
|
||||
0.0, time_limit_ - (time_dt() - state_.start_render_time));
|
||||
const double time_per_sample_average = path_trace_time_.get_average();
|
||||
const double predicted_render_time = num_samples_to_occupy * time_per_sample_average;
|
||||
|
||||
if (predicted_render_time > remaining_render_time) {
|
||||
num_samples_to_occupy = lround(num_samples_to_occupy *
|
||||
(remaining_render_time / predicted_render_time));
|
||||
}
|
||||
}
|
||||
|
||||
num_samples_to_render = max(num_samples_to_render,
|
||||
min(num_samples_to_occupy, max_num_samples_to_render));
|
||||
}
|
||||
|
@@ -39,7 +39,6 @@ class RenderWork {
|
||||
struct {
|
||||
int start_sample = 0;
|
||||
int num_samples = 0;
|
||||
int sample_offset = 0;
|
||||
} path_trace;
|
||||
|
||||
struct {
|
||||
@@ -126,9 +125,6 @@ class RenderScheduler {
|
||||
void set_num_samples(int num_samples);
|
||||
int get_num_samples() const;
|
||||
|
||||
void set_sample_offset(int sample_offset);
|
||||
int get_sample_offset() const;
|
||||
|
||||
/* Time limit for the path tracing tasks, in minutes.
|
||||
* Zero disables the limit. */
|
||||
void set_time_limit(double time_limit);
|
||||
@@ -154,7 +150,7 @@ class RenderScheduler {
|
||||
|
||||
/* Reset scheduler, indicating that rendering will happen from scratch.
|
||||
* Resets current rendered state, as well as scheduling information. */
|
||||
void reset(const BufferParams &buffer_params, int num_samples, int sample_offset);
|
||||
void reset(const BufferParams &buffer_params, int num_samples);
|
||||
|
||||
/* Reset scheduler upon switching to a next tile.
|
||||
* Will keep the same number of samples and full-frame render parameters, but will reset progress
|
||||
@@ -423,8 +419,6 @@ class RenderScheduler {
|
||||
int start_sample_ = 0;
|
||||
int num_samples_ = 0;
|
||||
|
||||
int sample_offset_ = 0;
|
||||
|
||||
/* Limit in seconds for how long path tracing is allowed to happen.
|
||||
* Zero means no limit is applied. */
|
||||
double time_limit_ = 0.0;
|
||||
|
@@ -96,7 +96,7 @@ bool ShaderEval::eval_cpu(Device *device,
|
||||
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
|
||||
|
||||
/* Find required kernel function. */
|
||||
const CPUKernels &kernels = Device::get_cpu_kernels();
|
||||
const CPUKernels &kernels = *(device->get_cpu_kernels());
|
||||
|
||||
/* Simple parallel_for over all work items. */
|
||||
KernelShaderEvalInput *input_data = input.data();
|
||||
|
@@ -36,7 +36,6 @@ void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
||||
void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
||||
int sample_start,
|
||||
int samples_num,
|
||||
int sample_offset,
|
||||
float scrambling_distance)
|
||||
{
|
||||
/* Image buffer parameters. */
|
||||
@@ -52,7 +51,6 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
||||
/* Samples parameters. */
|
||||
sample_start_ = sample_start;
|
||||
samples_num_ = samples_num;
|
||||
sample_offset_ = sample_offset;
|
||||
|
||||
/* Initialize new scheduling. */
|
||||
reset_scheduler_state();
|
||||
@@ -113,7 +111,6 @@ bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_
|
||||
work_tile.h = tile_size_.height;
|
||||
work_tile.start_sample = sample_start_ + start_sample;
|
||||
work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
|
||||
work_tile.sample_offset = sample_offset_;
|
||||
work_tile.offset = offset_;
|
||||
work_tile.stride = stride_;
|
||||
|
||||
|
@@ -41,7 +41,6 @@ class WorkTileScheduler {
|
||||
void reset(const BufferParams &buffer_params,
|
||||
int sample_start,
|
||||
int samples_num,
|
||||
int sample_offset,
|
||||
float scrambling_distance);
|
||||
|
||||
/* Get work for a device.
|
||||
@@ -80,7 +79,6 @@ class WorkTileScheduler {
|
||||
* (splitting into a smaller work tiles). */
|
||||
int sample_start_ = 0;
|
||||
int samples_num_ = 0;
|
||||
int sample_offset_ = 0;
|
||||
|
||||
/* Tile size which be scheduled for rendering. */
|
||||
TileSize tile_size_;
|
||||
|
@@ -39,10 +39,6 @@ set(SRC_KERNEL_DEVICE_HIP
|
||||
device/hip/kernel.cpp
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_METAL
|
||||
device/metal/kernel.metal
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_OPTIX
|
||||
device/optix/kernel.cu
|
||||
device/optix/kernel_shader_raytrace.cu
|
||||
@@ -83,13 +79,6 @@ set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
||||
device/optix/globals.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
||||
device/metal/compat.h
|
||||
device/metal/context_begin.h
|
||||
device/metal/context_end.h
|
||||
device/metal/globals.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||
closure/alloc.h
|
||||
closure/bsdf.h
|
||||
@@ -273,7 +262,6 @@ set(SRC_KERNEL_UTIL_HEADERS
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_TYPES_HEADERS
|
||||
tables.h
|
||||
textures.h
|
||||
types.h
|
||||
)
|
||||
@@ -411,8 +399,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/cuda
|
||||
--use_fast_math
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}
|
||||
-Wno-deprecated-gpu-targets)
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
|
||||
|
||||
if(${experimental})
|
||||
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
|
||||
set(name ${name}_experimental)
|
||||
endif()
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
set(cuda_flags ${cuda_flags}
|
||||
@@ -420,10 +412,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
-I "${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
||||
|
||||
@@ -572,14 +560,13 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
-ffast-math
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
set(hip_flags ${hip_flags}
|
||||
-D WITH_NANOVDB
|
||||
-I "${NANOVDB_INCLUDE_DIR}")
|
||||
if(${experimental})
|
||||
set(hip_flags ${hip_flags} -D __KERNEL_EXPERIMENTAL__)
|
||||
set(name ${name}_experimental)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(hip_flags ${hip_flags} -D WITH_CYCLES_DEBUG)
|
||||
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
@@ -620,10 +607,6 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
-I "${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
||||
# cycles_cubin_cc since the env variable is read before main()
|
||||
@@ -712,7 +695,7 @@ if(WITH_COMPILER_ASAN)
|
||||
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=all")
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
# With OSL, Cycles disables rtti in some modules, which then breaks at linking
|
||||
# With OSL, Cycles disables rtti in some modules, wich then breaks at linking
|
||||
# when trying to use vptr sanitizer (included into 'undefined' general option).
|
||||
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=vptr")
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
||||
@@ -740,14 +723,12 @@ cycles_add_library(cycles_kernel "${LIB}"
|
||||
${SRC_KERNEL_DEVICE_CUDA}
|
||||
${SRC_KERNEL_DEVICE_HIP}
|
||||
${SRC_KERNEL_DEVICE_OPTIX}
|
||||
${SRC_KERNEL_DEVICE_METAL}
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_METAL_HEADERS}
|
||||
)
|
||||
|
||||
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
||||
@@ -759,7 +740,6 @@ source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_
|
||||
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
||||
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
||||
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
||||
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
|
||||
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
||||
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||
@@ -792,8 +772,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
||||
|
@@ -97,7 +97,7 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
|
||||
swapped = false;
|
||||
for (int j = 0; j < num_hits - 1; ++j) {
|
||||
if (hits[j].t > hits[j + 1].t) {
|
||||
Intersection tmp_hit = hits[j];
|
||||
struct Intersection tmp_hit = hits[j];
|
||||
float3 tmp_Ng = Ng[j];
|
||||
hits[j] = hits[j + 1];
|
||||
Ng[j] = Ng[j + 1];
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user