Compare commits

..

102 Commits

Author SHA1 Message Date
e1c1c65578 cleanup 2021-10-31 14:28:01 +01:00
2154631e85 cleanups 2021-10-31 14:20:04 +01:00
71b7e59e28 remove print 2021-10-31 14:09:16 +01:00
7106fe299e cleanup 2021-10-31 14:06:48 +01:00
4b8e52bdbc add timeit include 2021-10-31 13:50:05 +01:00
462d3bde6e fix 2021-10-31 13:36:24 +01:00
e354ff5b86 cleanup 2021-10-31 13:27:43 +01:00
27da814444 add test 2021-10-31 13:08:01 +01:00
493571c3c3 add some noexcepts 2021-10-31 13:02:49 +01:00
a6eb04cce4 cleanups 2021-10-31 12:39:46 +01:00
139f69b3b9 Merge branch 'master' into virtual-array-value-type 2021-10-31 12:33:45 +01:00
8a1cc2f291 Merge branch 'master' into virtual-array-value-type 2021-10-30 23:04:24 +02:00
572fa82cc9 cleanup 2021-10-30 23:04:04 +02:00
c8ab4dcce1 cleanup 2021-10-30 23:02:05 +02:00
55a6af1c1e cleanup 2021-10-30 22:52:28 +02:00
0827245a91 cleanup 2021-10-30 22:49:26 +02:00
2f284f42b6 progress 2021-10-30 22:38:35 +02:00
1131bf6ec6 progress 2021-10-30 22:32:48 +02:00
2b0e890a55 comments 2021-10-30 22:20:04 +02:00
c4d3380d2e progress 2021-10-30 21:36:11 +02:00
05cef9da88 cleanup 2021-10-30 21:20:15 +02:00
43a53f97c4 cleanup tests 2021-10-30 21:17:24 +02:00
1d7a893d12 cleanup 2021-10-30 21:07:33 +02:00
5e60ba93f7 progress 2021-10-30 21:05:56 +02:00
65258dc98f progress 2021-10-30 21:02:49 +02:00
d449e66c1d cleanup 2021-10-30 20:59:26 +02:00
01fe332b9d progress 2021-10-30 20:54:00 +02:00
d6b0f3a3a4 progress 2021-10-30 20:42:37 +02:00
928b7b5340 cleanup 2021-10-30 20:35:26 +02:00
aa22839aa1 progress 2021-10-30 20:31:53 +02:00
78daf1b697 progress 2021-10-30 20:24:20 +02:00
dcd9b21d07 progress 2021-10-30 20:21:48 +02:00
42b350ed7f progress 2021-10-30 20:14:43 +02:00
e79c7e088e progress 2021-10-30 19:59:35 +02:00
99c5b788eb progress 2021-10-30 19:55:53 +02:00
a2b5a74d49 progress 2021-10-30 19:46:17 +02:00
84626dfa16 progress 2021-10-30 19:40:58 +02:00
3d0abb3be9 progress 2021-10-30 19:31:19 +02:00
9b6c13e66c progress 2021-10-30 19:10:44 +02:00
7f4273d373 progress 2021-10-30 18:50:28 +02:00
d1b8d6acd3 progress 2021-10-30 18:39:34 +02:00
2267f19486 progress 2021-10-30 18:18:27 +02:00
acd8874205 progress 2021-10-30 18:13:52 +02:00
a69bd34fad fix 2021-10-30 16:56:05 +02:00
2516fc953a Merge branch 'blender-v3.0-release' 2021-10-30 16:54:06 +02:00
50f93cbf52 cleanup 2021-10-25 00:35:58 +02:00
8af541dcfd Geometry Nodes: Refactor virtual array system.
Differential Revision: https://developer.blender.org/D12986
2021-10-25 00:16:40 +02:00
f8c0682a67 progress 2021-10-24 23:58:16 +02:00
b40e753c81 fix 2021-10-24 23:20:32 +02:00
2cff0676af cleanup 2021-10-24 23:13:25 +02:00
b0444a347b cleanup 2021-10-24 23:11:47 +02:00
becfc547b9 cleanup 2021-10-24 23:08:22 +02:00
6a558a18f9 progress 2021-10-24 23:02:51 +02:00
c066102f30 progress 2021-10-24 22:55:05 +02:00
ba49545060 progress 2021-10-24 22:50:52 +02:00
ca70c0521a progress 2021-10-24 22:44:54 +02:00
4323b3f592 progress 2021-10-24 22:28:43 +02:00
d1e6606d4d progress 2021-10-24 22:10:47 +02:00
b64ad0b30a cleanup 2021-10-24 22:00:32 +02:00
ed4b15ecd3 cleanup 2021-10-24 21:57:56 +02:00
f79a673bf1 cleanup 2021-10-24 21:44:08 +02:00
1438f16663 fix 2021-10-24 21:09:58 +02:00
d613451c3a Merge branch 'master' into virtual-array-value-type 2021-10-24 20:56:50 +02:00
3f8ac0e5ef Merge branch 'master' into virtual-array-value-type 2021-10-24 17:27:01 +02:00
56bd1f46bd fixes 2021-10-24 17:20:54 +02:00
1e4cbec720 fix 2021-10-24 17:09:24 +02:00
f9b669c588 fixes 2021-10-24 16:45:02 +02:00
1ac0a2db11 cleanup 2021-10-24 16:38:23 +02:00
771cce22f1 cleanup 2021-10-24 16:34:50 +02:00
c1a1046ed7 cleanup 2021-10-24 16:29:33 +02:00
f430e7850a cleanup 2021-10-24 16:24:58 +02:00
116eb09681 fix 2021-10-24 16:23:35 +02:00
841d393c15 cleanup 2021-10-24 16:23:17 +02:00
bc1acbcc5a cleanup 2021-10-24 16:15:11 +02:00
cd49f68db6 cleanup 2021-10-24 16:08:02 +02:00
5b51065347 cleanup 2021-10-24 16:05:15 +02:00
e736fe67c9 cleanup 2021-10-24 15:59:12 +02:00
1d20f60616 fixes 2021-10-24 15:27:21 +02:00
91b58c95ac fixes 2021-10-24 14:57:51 +02:00
955b01e359 fix assignment operators 2021-10-24 13:59:19 +02:00
1b3758f205 cleanup 2021-10-24 13:59:11 +02:00
33870b21f3 fix output attribute 2021-10-24 13:05:45 +02:00
e49f70745b fix dst fields 2021-10-24 12:56:11 +02:00
c48a99542d compiling works again 2021-10-24 02:26:08 +02:00
68b5670268 more renaming 2021-10-23 22:50:37 +02:00
dea268de72 progress 2021-10-23 22:42:30 +02:00
399463c548 progress 2021-10-23 22:35:12 +02:00
1d94de4ac3 progress 2021-10-23 20:22:49 +02:00
0453361d20 progress 2021-10-23 20:11:19 +02:00
b21a3ae6ad rename to *Impl 2021-10-23 19:15:29 +02:00
edb8ccf31f support converting VMutableArray to VArray 2021-10-23 19:14:14 +02:00
7af712c343 add mutable virtual array 2021-10-23 19:06:53 +02:00
a9e0cb6ec8 rename to VMutableArrayImpl 2021-10-23 18:53:34 +02:00
5a7b30f8f9 cleanup 2021-10-23 18:52:44 +02:00
024c07af84 more virtual array tests 2021-10-23 18:10:11 +02:00
c256298924 rename VArray to VArrayImpl 2021-10-23 16:48:22 +02:00
a11950531f add comments 2021-10-23 16:36:28 +02:00
ae01d1db98 add extra type info 2021-10-23 16:08:20 +02:00
f3a5b31196 fix assign any 2021-10-23 15:54:56 +02:00
8d1a5fcfea add tests 2021-10-23 15:45:49 +02:00
39c4103f9e move methods 2021-10-23 15:22:28 +02:00
4e03ac6b25 initial Any type 2021-10-23 14:50:11 +02:00
1144 changed files with 13588 additions and 25950 deletions

View File

@@ -12,8 +12,6 @@ Checks: >
-readability-avoid-const-params-in-decls,
-readability-simplify-boolean-expr,
-readability-make-member-function-const,
-readability-suspicious-call-argument,
-readability-redundant-member-init,
-readability-misleading-indentation,
@@ -27,8 +25,6 @@ Checks: >
-bugprone-branch-clone,
-bugprone-macro-parentheses,
-bugprone-reserved-identifier,
-bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-sizeof-expression,
-bugprone-integer-division,
@@ -44,8 +40,7 @@ Checks: >
-modernize-pass-by-value,
# Cannot be enabled yet, because using raw string literals in tests breaks
# the windows compiler currently.
-modernize-raw-string-literal,
-modernize-return-braced-init-list
-modernize-raw-string-literal
CheckOptions:
- key: modernize-use-default-member-init.UseAssignment

View File

@@ -187,13 +187,6 @@ mark_as_advanced(CPACK_OVERRIDE_PACKAGENAME)
mark_as_advanced(BUILDINFO_OVERRIDE_DATE)
mark_as_advanced(BUILDINFO_OVERRIDE_TIME)
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
option(WITH_UNITY_BUILD "Enable unity build for modules that support it to improve compile times" ON)
mark_as_advanced(WITH_UNITY_BUILD)
else()
set(WITH_UNITY_BUILD OFF)
endif()
option(WITH_IK_ITASC "Enable ITASC IK solver (only disable for development & for incompatible C++ compilers)" ON)
option(WITH_IK_SOLVER "Enable Legacy IK solver (only disable for development)" ON)
option(WITH_FFTW3 "Enable FFTW3 support (Used for smoke, ocean sim, and audio effects)" ON)
@@ -418,7 +411,6 @@ option(WITH_CYCLES "Enable Cycles Render Engine" ON)
option(WITH_CYCLES_OSL "Build Cycles with OpenShadingLanguage support" ON)
option(WITH_CYCLES_EMBREE "Build Cycles with Embree support" ON)
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
option(WITH_CYCLES_DEBUG "Build Cycles with options useful for debugging (e.g., MIS)" OFF)
option(WITH_CYCLES_STANDALONE "Build Cycles standalone application" OFF)
option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF)
@@ -448,11 +440,7 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
mark_as_advanced(WITH_CUDA_DYNLOAD)
# AMD HIP
if(WIN32)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
else()
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
endif()
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
@@ -502,8 +490,7 @@ endif()
# This should be turned off when Blender enter beta/rc/release
if("${BLENDER_VERSION_CYCLE}" STREQUAL "release" OR
"${BLENDER_VERSION_CYCLE}" STREQUAL "rc" OR
"${BLENDER_VERSION_CYCLE}" STREQUAL "beta")
"${BLENDER_VERSION_CYCLE}" STREQUAL "rc")
set(WITH_EXPERIMENTAL_FEATURES OFF)
else()
set(WITH_EXPERIMENTAL_FEATURES ON)
@@ -655,7 +642,7 @@ if(WIN32)
option(WITH_WINDOWS_PDB "Generate a pdb file for client side stacktraces" ON)
mark_as_advanced(WITH_WINDOWS_PDB)
option(WITH_WINDOWS_STRIPPED_PDB "Use a stripped PDB file" ON)
option(WITH_WINDOWS_STRIPPED_PDB "Use a stripped PDB file" On)
mark_as_advanced(WITH_WINDOWS_STRIPPED_PDB)
endif()
@@ -1078,7 +1065,7 @@ if(MSVC)
add_definitions(-D__LITTLE_ENDIAN__)
# OSX-Note: as we do cross-compiling with specific set architecture,
# endianness-detection and auto-setting is counterproductive
# endianess-detection and auto-setting is counterproductive
# so we just set endianness according CMAKE_OSX_ARCHITECTURES
elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64 OR CMAKE_OSX_ARCHITECTURES MATCHES arm64)
@@ -1768,7 +1755,7 @@ endif()
set(CMAKE_CXX_STANDARD 17)
# If C++17 is not available, downgrading to an earlier standard is NOT OK.
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Do not enable compiler specific language extensions.
# Do not enable compiler specific language extentions.
set(CMAKE_CXX_EXTENSIONS OFF)
# Make MSVC properly report the value of the __cplusplus preprocessor macro

View File

@@ -51,7 +51,7 @@ Other Convenience Targets
* config: Run cmake configuration tool to set build options.
* deps: Build library dependencies (intended only for platform maintainers).
The existance of locally build dependencies overrides the pre-built dependencies from subversion.
The existance of locally build dependancies overrides the pre-built dependencies from subversion.
These must be manually removed from '../lib/' to go back to using the pre-compiled libraries.
Project Files

View File

@@ -17,7 +17,7 @@
# ***** END GPL LICENSE BLOCK *****
########################################################################
# Copy all generated files to the proper structure as blender prefers
# Copy all generated files to the proper strucure as blender prefers
########################################################################
if(NOT DEFINED HARVEST_TARGET)

View File

@@ -42,7 +42,6 @@ ExternalProject_Add(nanovdb
URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
PREFIX ${BUILD_DIR}/nanovdb
SOURCE_SUBDIR nanovdb
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/nanovdb/src/nanovdb < ${PATCH_DIR}/nanovdb.diff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/nanovdb
)

View File

@@ -39,7 +39,7 @@ endif()
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
# This path must be hard-coded like this, so that the GNUmakefile knows where it is and can pass it to make_source_archive.py:
set(PACKAGE_DIR "${CMAKE_CURRENT_BINARY_DIR}/packages")
option(PACKAGE_USE_UPSTREAM_SOURCES "Use sources upstream to download the package sources, when OFF the blender mirror will be used" ON)
option(PACKAGE_USE_UPSTREAM_SOURCES "Use soures upstream to download the package sources, when OFF the blender mirror will be used" ON)
file(TO_CMAKE_PATH ${DOWNLOAD_DIR} DOWNLOAD_DIR)
file(TO_CMAKE_PATH ${PACKAGE_DIR} PACKAGE_DIR)

View File

@@ -24,7 +24,7 @@ if(MSVC)
add_custom_command(
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
COMMAND echo packaging python
COMMAND echo this should output at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
COMMAND echo this should ouput at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}.lib
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python.exe ${PYTARGET}/bin/python.exe
@@ -43,7 +43,7 @@ if(MSVC)
add_custom_command(
OUTPUT ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
COMMAND echo packaging python
COMMAND echo this should output at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
COMMAND echo this should ouput at ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe
COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTARGET}/libs
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib ${PYTARGET}/libs/python${PYTHON_SHORT_VERSION_NO_DOTS}${PYTHON_POSTFIX}.lib
COMMAND ${CMAKE_COMMAND} -E copy ${PYSRC}/python${PYTHON_POSTFIX}.exe ${PYTARGET}/bin/python${PYTHON_POSTFIX}.exe

View File

@@ -1826,7 +1826,7 @@ compile_OCIO() {
# Force linking against static libs
#rm -f $_inst/lib/*.so*
# Additional dependencies
# Additional depencencies
#cp ext/dist/lib/libtinyxml.a $_inst/lib
#cp ext/dist/lib/libyaml-cpp.a $_inst/lib

View File

@@ -1,374 +0,0 @@
Index: nanovdb/nanovdb/NanoVDB.h
===================================================================
--- a/nanovdb/nanovdb/NanoVDB.h (revision 62751)
+++ b/nanovdb/nanovdb/NanoVDB.h (working copy)
@@ -152,8 +152,8 @@
#endif // __CUDACC_RTC__
-#ifdef __CUDACC__
-// Only define __hostdev__ when using NVIDIA CUDA compiler
+#if defined(__CUDACC__) || defined(__HIP__)
+// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler
#define __hostdev__ __host__ __device__
#else
#define __hostdev__
@@ -461,7 +461,7 @@
/// Maximum floating-point values
template<typename T>
struct Maximum;
-#ifdef __CUDA_ARCH__
+#if defined(__CUDA_ARCH__) || defined(__HIP__)
template<>
struct Maximum<int>
{
@@ -1006,10 +1006,10 @@
using Vec3i = Vec3<int>;
/// @brief Return a single precision floating-point vector of this coordinate
-Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
+inline __hostdev__ Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
/// @brief Return a double precision floating-point vector of this coordinate
-Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
+inline __hostdev__ Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
// ----------------------------> Vec4 <--------------------------------------
@@ -1820,7 +1820,7 @@
}; // Map
template<typename Mat4T>
-void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
+__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
{
float * mf = mMatF, *vf = mVecF;
float* mif = mInvMatF;
@@ -2170,7 +2170,7 @@
}; // Class Grid
template<typename TreeT>
-int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
+__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
{
for (uint32_t i = 0, n = blindDataCount(); i < n; ++i)
if (blindMetaData(i).mSemantic == semantic)
@@ -2328,7 +2328,7 @@
}; // Tree class
template<typename RootT>
-void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
+__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
{
min = this->root().valueMin();
max = this->root().valueMax();
@@ -2336,7 +2336,7 @@
template<typename RootT>
template<typename NodeT>
-const NodeT* Tree<RootT>::getNode(uint32_t i) const
+__hostdev__ const NodeT* Tree<RootT>::getNode(uint32_t i) const
{
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: unvalid node type");
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
@@ -2345,7 +2345,7 @@
template<typename RootT>
template<int LEVEL>
-const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
+__hostdev__ const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
{
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
return reinterpret_cast<const TreeNodeT<LEVEL>*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
@@ -2353,7 +2353,7 @@
template<typename RootT>
template<typename NodeT>
-NodeT* Tree<RootT>::getNode(uint32_t i)
+__hostdev__ NodeT* Tree<RootT>::getNode(uint32_t i)
{
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: invalid node type");
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
@@ -2362,7 +2362,7 @@
template<typename RootT>
template<int LEVEL>
-typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
+__hostdev__ typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
{
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
return reinterpret_cast<TreeNodeT<LEVEL>*>(reinterpret_cast<uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
@@ -2370,7 +2370,7 @@
template<typename RootT>
template<typename NodeT>
-uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
+__hostdev__ uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
{
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNodeID: invalid node type");
const NodeT* first = reinterpret_cast<const NodeT*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[NodeT::LEVEL]);
@@ -2380,7 +2380,7 @@
template<typename RootT>
template<typename NodeT>
-uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
+__hostdev__ uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
{
return this->getNodeID(node) + DataType::mPFSum[NodeT::LEVEL];
}
@@ -3366,7 +3366,7 @@
}; // LeafNode class
template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
-inline void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
+inline __hostdev__ void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
{
static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
if (!this->isActive()) return;
Index: nanovdb/nanovdb/util/SampleFromVoxels.h
===================================================================
--- a/nanovdb/nanovdb/util/SampleFromVoxels.h (revision 62751)
+++ b/nanovdb/nanovdb/util/SampleFromVoxels.h (working copy)
@@ -22,7 +22,7 @@
#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
// Only define __hostdev__ when compiling as NVIDIA CUDA
-#ifdef __CUDACC__
+#if defined(__CUDACC__) || defined(__HIP__)
#define __hostdev__ __host__ __device__
#else
#include <cmath> // for floor
@@ -136,7 +136,7 @@
template<typename TreeOrAccT>
template<typename Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
{
const CoordT ijk = Round<CoordT>(xyz);
if (ijk != mPos) {
@@ -147,7 +147,7 @@
}
template<typename TreeOrAccT>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
{
if (ijk != mPos) {
mPos = ijk;
@@ -158,7 +158,7 @@
template<typename TreeOrAccT>
template<typename Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
{
return mAcc.getValue(Round<CoordT>(xyz));
}
@@ -195,7 +195,7 @@
}; // TrilinearSamplerBase
template<typename TreeOrAccT>
-void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
+__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
{
v[0][0][0] = mAcc.getValue(ijk); // i, j, k
@@ -224,7 +224,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
+__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
{
#if 0
auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
@@ -239,7 +239,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
{
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
#if 0
@@ -270,7 +270,7 @@
}
template<typename TreeOrAccT>
-bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
+__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
{
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
const bool less = v[0][0][0] < ValueT(0);
@@ -363,7 +363,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
{
this->cache(xyz);
return BaseT::sample(xyz, mVal);
@@ -370,7 +370,7 @@
}
template<typename TreeOrAccT>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
{
return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
}
@@ -377,7 +377,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
{
this->cache(xyz);
return BaseT::gradient(xyz, mVal);
@@ -393,7 +393,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
{
CoordT ijk = Floor<CoordT>(xyz);
if (ijk != mPos) {
@@ -406,7 +406,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
{
ValueT val[2][2][2];
CoordT ijk = Floor<CoordT>(xyz);
@@ -418,7 +418,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
{
auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
@@ -463,7 +463,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
+inline __hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
{
ValueT val[2][2][2];
CoordT ijk = Floor<CoordT>(xyz);
@@ -473,7 +473,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
{
ValueT val[2][2][2];
CoordT ijk = Floor<CoordT>(xyz);
@@ -510,7 +510,7 @@
}; // TriquadraticSamplerBase
template<typename TreeOrAccT>
-void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
+__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
{
CoordT p(ijk[0] - 1, 0, 0);
for (int dx = 0; dx < 3; ++dx, ++p[0]) {
@@ -526,7 +526,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
+__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
{
auto kernel = [](const ValueT* value, double weight)->ValueT {
return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) +
@@ -545,7 +545,7 @@
}
template<typename TreeOrAccT>
-bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
+__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
{
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
const bool less = v[0][0][0] < ValueT(0);
@@ -624,7 +624,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
{
this->cache(xyz);
return BaseT::sample(xyz, mVal);
@@ -631,7 +631,7 @@
}
template<typename TreeOrAccT>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
{
return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
}
@@ -646,7 +646,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
{
CoordT ijk = Floor<CoordT>(xyz);
if (ijk != mPos) {
@@ -657,7 +657,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
{
ValueT val[3][3][3];
CoordT ijk = Floor<CoordT>(xyz);
@@ -667,7 +667,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
{
ValueT val[3][3][3];
CoordT ijk = Floor<CoordT>(xyz);
@@ -710,7 +710,7 @@
}; // TricubicSampler
template<typename TreeOrAccT>
-void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
+__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
{
auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
@@ -929,7 +929,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
{
this->cache(xyz);
return BaseT::sample(xyz, mC);
@@ -937,7 +937,7 @@
template<typename TreeOrAccT>
template<typename RealT, template<typename...> class Vec3T>
-void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
{
CoordT ijk = Floor<CoordT>(xyz);
if (ijk != mPos) {

View File

@@ -168,7 +168,7 @@ def function_parm_wash_tokens(parm):
# if tokens[-1].kind == To
# remove trailing char
if tokens[-1].kind == TokenKind.PUNCTUATION:
if tokens[-1].spelling in {",", ")", ";"}:
if tokens[-1].spelling in (",", ")", ";"):
tokens.pop()
# else:
# print(tokens[-1].spelling)
@@ -179,7 +179,7 @@ def function_parm_wash_tokens(parm):
t_spelling = t.spelling
ok = True
if t_kind == TokenKind.KEYWORD:
if t_spelling in {"const", "restrict", "volatile"}:
if t_spelling in ("const", "restrict", "volatile"):
ok = False
elif t_spelling.startswith("__"):
ok = False # __restrict

View File

@@ -180,7 +180,7 @@ def create_nb_project_main():
f.write(' </logicalFolder>\n')
f.write(' </logicalFolder>\n')
# default, but this dir is in fact not in blender dir so we can ignore it
# default, but this dir is infact not in blender dir so we can ignore it
# f.write(' <sourceFolderFilter>^(nbproject)$</sourceFolderFilter>\n')
f.write(r' <sourceFolderFilter>^(nbproject|__pycache__|.*\.py|.*\.html|.*\.blend)$</sourceFolderFilter>\n')

View File

@@ -81,5 +81,4 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@@ -529,7 +529,7 @@ function(SETUP_LIBDIRS)
# NOTE: For all new libraries, use absolute library paths.
# This should eventually be phased out.
# APPLE platform uses full paths for linking libraries, and avoids link_directories.
# APPLE plaform uses full paths for linking libraries, and avoids link_directories.
if(NOT MSVC AND NOT APPLE)
link_directories(${JPEG_LIBPATH} ${PNG_LIBPATH} ${ZLIB_LIBPATH} ${FREETYPE_LIBPATH})

View File

@@ -27,7 +27,7 @@ if(NOT MSVC)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(MSVC_CLANG ON)
set(MSVC_CLANG On)
set(VC_TOOLS_DIR $ENV{VCToolsRedistDir} CACHE STRING "Location of the msvc redistributables")
set(MSVC_REDIST_DIR ${VC_TOOLS_DIR})
if(DEFINED MSVC_REDIST_DIR)
@@ -53,7 +53,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "Clang")
endif()
if(WITH_WINDOWS_STRIPPED_PDB)
message(WARNING "stripped pdb not supported with clang, disabling..")
set(WITH_WINDOWS_STRIPPED_PDB OFF)
set(WITH_WINDOWS_STRIPPED_PDB Off)
endif()
endif()
@@ -159,7 +159,7 @@ endif()
if(WITH_COMPILER_ASAN AND MSVC AND NOT MSVC_CLANG)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 19.28.29828)
#set a flag so we don't have to do this comparison all the time
SET(MSVC_ASAN ON)
SET(MSVC_ASAN On)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fsanitize=address")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fsanitize=address")
string(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /INCREMENTAL:NO")
@@ -179,22 +179,22 @@ endif()
if(WITH_WINDOWS_SCCACHE AND CMAKE_VS_MSBUILD_COMMAND)
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
set(WITH_WINDOWS_SCCACHE OFF)
set(WITH_WINDOWS_SCCACHE Off)
endif()
# Debug Symbol format
# sccache # MSVC_ASAN # format # why
# ON # ON # Z7 # sccache will only play nice with Z7
# ON # OFF # Z7 # sccache will only play nice with Z7
# OFF # ON # Zi # Asan will not play nice with Edit and Continue
# OFF # OFF # ZI # Neither asan nor sscache is enabled Edit and Continue is available
# On # On # Z7 # sccache will only play nice with Z7
# On # Off # Z7 # sccache will only play nice with Z7
# Off # On # Zi # Asan will not play nice with Edit and Continue
# Off # Off # ZI # Neither asan nor sscache is enabled Edit and Continue is available
# Release Symbol format
# sccache # MSVC_ASAN # format # why
# ON # ON # Z7 # sccache will only play nice with Z7
# ON # OFF # Z7 # sccache will only play nice with Z7
# OFF # ON # Zi # Asan will not play nice with Edit and Continue
# OFF # OFF # Zi # Edit and Continue disables some optimizations
# On # On # Z7 # sccache will only play nice with Z7
# On # Off # Z7 # sccache will only play nice with Z7
# Off # On # Zi # Asan will not play nice with Edit and Continue
# Off # Off # Zi # Edit and Continue disables some optimizations
if(WITH_WINDOWS_SCCACHE)
@@ -288,7 +288,7 @@ if(CMAKE_GENERATOR MATCHES "^Visual Studio.+" AND # Only supported in the VS IDE
"EnableMicrosoftCodeAnalysis=false"
"EnableClangTidyCodeAnalysis=true"
)
set(VS_CLANG_TIDY ON)
set(VS_CLANG_TIDY On)
endif()
# Mark libdir as system headers with a lower warn level, to resolve some warnings
@@ -469,7 +469,7 @@ if(WITH_PYTHON)
set(PYTHON_INCLUDE_DIR ${LIBDIR}/python/${_PYTHON_VERSION_NO_DOTS}/include)
set(PYTHON_NUMPY_INCLUDE_DIRS ${LIBDIR}/python/${_PYTHON_VERSION_NO_DOTS}/lib/site-packages/numpy/core/include)
set(NUMPY_FOUND ON)
set(NUMPY_FOUND On)
unset(_PYTHON_VERSION_NO_DOTS)
# uncached vars
set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
@@ -853,18 +853,18 @@ if(WITH_GMP)
set(GMP_INCLUDE_DIRS ${LIBDIR}/gmp/include)
set(GMP_LIBRARIES ${LIBDIR}/gmp/lib/libgmp-10.lib optimized ${LIBDIR}/gmp/lib/libgmpxx.lib debug ${LIBDIR}/gmp/lib/libgmpxx_d.lib)
set(GMP_ROOT_DIR ${LIBDIR}/gmp)
set(GMP_FOUND ON)
set(GMP_FOUND On)
endif()
if(WITH_POTRACE)
set(POTRACE_INCLUDE_DIRS ${LIBDIR}/potrace/include)
set(POTRACE_LIBRARIES ${LIBDIR}/potrace/lib/potrace.lib)
set(POTRACE_FOUND ON)
set(POTRACE_FOUND On)
endif()
if(WITH_HARU)
if(EXISTS ${LIBDIR}/haru)
set(HARU_FOUND ON)
set(HARU_FOUND On)
set(HARU_ROOT_DIR ${LIBDIR}/haru)
set(HARU_INCLUDE_DIRS ${HARU_ROOT_DIR}/include)
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)

View File

@@ -27,7 +27,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
# Install the CRT to the blender.crt Sub folder.
install(FILES ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION ./blender.crt COMPONENT Libraries)
# Generating the manifest is a relatively expensive operation since
# Generating the manifest is a relativly expensive operation since
# it is collecting an sha1 hash for every file required. so only do
# this work when the libs have either changed or the manifest does
# not exist yet.

View File

@@ -11,7 +11,7 @@ import queue
execution_queue = queue.Queue()
# This function can safely be called in another thread.
# This function can savely be called in another thread.
# The function will be executed when the timer runs the next time.
def run_in_main_thread(function):
execution_queue.put(function)

View File

@@ -42,13 +42,8 @@ class SimpleMouseOperator(bpy.types.Operator):
self.y = event.mouse_y
return self.execute(context)
# Only needed if you want to add into a dynamic menu
def menu_func(self, context):
self.layout.operator(SimpleMouseOperator.bl_idname, text="Simple Mouse Operator")
# Register and add to the view menu (required to also use F3 search "Simple Mouse Operator" for quick access)
bpy.utils.register_class(SimpleMouseOperator)
bpy.types.VIEW3D_MT_view.append(menu_func)
# Test call to the newly defined operator.
# Here we call the operator and invoke it, meaning that the settings are taken

View File

@@ -43,7 +43,7 @@ def menu_func(self, context):
self.layout.operator(ExportSomeData.bl_idname, text="Text Export Operator")
# Register and add to the file selector (required to also use F3 search "Text Export Operator" for quick access)
# Register and add to the file selector
bpy.utils.register_class(ExportSomeData)
bpy.types.TOPBAR_MT_file_export.append(menu_func)

View File

@@ -27,14 +27,8 @@ class DialogOperator(bpy.types.Operator):
wm = context.window_manager
return wm.invoke_props_dialog(self)
# Only needed if you want to add into a dynamic menu
def menu_func(self, context):
self.layout.operator(DialogOperator.bl_idname, text="Dialog Operator")
# Register and add to the object menu (required to also use F3 search "Dialog Operator" for quick access)
bpy.utils.register_class(DialogOperator)
bpy.types.VIEW3D_MT_object.append(menu_func)
# Test call.
bpy.ops.object.dialog_operator('INVOKE_DEFAULT')

View File

@@ -41,13 +41,8 @@ class CustomDrawOperator(bpy.types.Operator):
col.prop(self, "my_string")
# Only needed if you want to add into a dynamic menu
def menu_func(self, context):
self.layout.operator(CustomDrawOperator.bl_idname, text="Custom Draw Operator")
# Register and add to the object menu (required to also use F3 search "Custom Draw Operator" for quick access)
bpy.utils.register_class(CustomDrawOperator)
bpy.types.VIEW3D_MT_object.append(menu_func)
# test call
bpy.ops.object.custom_draw('INVOKE_DEFAULT')

View File

@@ -55,13 +55,8 @@ class ModalOperator(bpy.types.Operator):
context.window_manager.modal_handler_add(self)
return {'RUNNING_MODAL'}
# Only needed if you want to add into a dynamic menu
def menu_func(self, context):
self.layout.operator(ModalOperator.bl_idname, text="Modal Operator")
# Register and add to the object menu (required to also use F3 search "Modal Operator" for quick access)
bpy.utils.register_class(ModalOperator)
bpy.types.VIEW3D_MT_object.append(menu_func)
# test call
bpy.ops.object.modal_operator('INVOKE_DEFAULT')

View File

@@ -31,13 +31,8 @@ class SearchEnumOperator(bpy.types.Operator):
context.window_manager.invoke_search_popup(self)
return {'RUNNING_MODAL'}
# Only needed if you want to add into a dynamic menu
def menu_func(self, context):
self.layout.operator(SearchEnumOperator.bl_idname, text="Search Enum Operator")
# Register and add to the object menu (required to also use F3 search "Search Enum Operator" for quick access)
bpy.utils.register_class(SearchEnumOperator)
bpy.types.VIEW3D_MT_object.append(menu_func)
# test call
bpy.ops.object.search_enum_operator('INVOKE_DEFAULT')

View File

@@ -22,13 +22,8 @@ class HelloWorldOperator(bpy.types.Operator):
print("Hello World")
return {'FINISHED'}
# Only needed if you want to add into a dynamic menu
def menu_func(self, context):
self.layout.operator(HelloWorldOperator.bl_idname, text="Hello World Operator")
# Register and add to the view menu (required to also use F3 search "Hello World Operator" for quick access)
bpy.utils.register_class(HelloWorldOperator)
bpy.types.VIEW3D_MT_view.append(menu_func)
# test call to the newly defined operator
bpy.ops.wm.hello_world()

View File

@@ -106,6 +106,24 @@ including advanced features.
floating-point values. These values are interpreted as a plane equation.
.. function:: glColor (red, green, blue, alpha):
B{glColor3b, glColor3d, glColor3f, glColor3i, glColor3s, glColor3ub, glColor3ui, glColor3us,
glColor4b, glColor4d, glColor4f, glColor4i, glColor4s, glColor4ub, glColor4ui, glColor4us,
glColor3bv, glColor3dv, glColor3fv, glColor3iv, glColor3sv, glColor3ubv, glColor3uiv,
glColor3usv, glColor4bv, glColor4dv, glColor4fv, glColor4iv, glColor4sv, glColor4ubv,
glColor4uiv, glColor4usv}
Set a new color.
.. seealso:: `OpenGL Docs <https://khronos.org/registry/OpenGL-Refpages/gl4/html/glColor.xhtml>`__
:type red, green, blue, alpha: Depends on function prototype.
:arg red, green, blue: Specify new red, green, and blue values for the current color.
:arg alpha: Specifies a new alpha value for the current color. Included only in the
four-argument glColor4 commands. (With '4' colors only)
.. function:: glColorMask(red, green, blue, alpha):
Enable and disable writing of frame buffer color components

View File

@@ -728,7 +728,7 @@ Abusing RNA property callbacks
------------------------------
Python-defined RNA properties can have custom callbacks. Trying to perform complex operations
from there, like calling an operator, may work, but is not officially recommended nor supported.
from there, like calling an operator, may work, but is not officialy recommended nor supported.
Main reason is that those callback should be very fast, but additionally, it may for example
create issues with undo/redo system (most operators store an history step, and editing an RNA

View File

@@ -1103,7 +1103,6 @@ context_type_map = {
"selectable_objects": ("Object", True),
"selected_asset_files": ("FileSelectEntry", True),
"selected_bones": ("EditBone", True),
"selected_editable_actions": ("Action", True),
"selected_editable_bones": ("EditBone", True),
"selected_editable_fcurves": ("FCurve", True),
"selected_editable_keyframes": ("Keyframe", True),
@@ -1119,7 +1118,6 @@ context_type_map = {
"selected_pose_bones": ("PoseBone", True),
"selected_pose_bones_from_active_object": ("PoseBone", True),
"selected_sequences": ("Sequence", True),
"selected_visible_actions": ("Action", True),
"selected_visible_fcurves": ("FCurve", True),
"sequences": ("Sequence", True),
"soft_body": ("SoftBodyModifier", False),
@@ -2256,7 +2254,7 @@ def main():
# First monkey patch to load in fake members.
setup_monkey_patch()
# Perform changes to Blender itself.
# Perform changes to Blender it's self.
setup_data = setup_blender()
# eventually, create the dirs

View File

@@ -804,29 +804,31 @@ typedef enum hipDeviceP2PAttr {
} hipDeviceP2PAttr;
typedef struct HIP_MEMCPY3D {
unsigned int srcXInBytes;
unsigned int srcY;
unsigned int srcZ;
unsigned int srcLOD;
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hArray srcArray;
unsigned int srcPitch;
unsigned int srcHeight;
unsigned int dstXInBytes;
unsigned int dstY;
unsigned int dstZ;
unsigned int dstLOD;
hArray * srcArray;
void* reserved0;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hArray dstArray;
unsigned int dstPitch;
unsigned int dstHeight;
unsigned int WidthInBytes;
unsigned int Height;
unsigned int Depth;
hArray * dstArray;
void* reserved1;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} HIP_MEMCPY3D;
typedef struct HIP_MEMCPY3D_PEER_st {
@@ -877,7 +879,7 @@ typedef struct HIP_RESOURCE_DESC_st {
hipResourceType resType;
union {
struct {
hArray h_Array;
hArray * h_Array;
} array;
struct {
hipMipmappedArray_t hMipmappedArray;
@@ -1072,10 +1074,9 @@ typedef enum hiprtcResult {
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
typedef hipError_t HIPAPI thipGetDevice(int* device);
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
@@ -1208,7 +1209,6 @@ extern thipDriverGetVersion *hipDriverGetVersion;
extern thipGetDevice *hipGetDevice;
extern thipGetDeviceCount *hipGetDeviceCount;
extern thipGetDeviceProperties *hipGetDeviceProperties;
extern thipDeviceGet* hipDeviceGet;
extern thipDeviceGetName *hipDeviceGetName;
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
@@ -1333,7 +1333,6 @@ enum {
HIPEW_SUCCESS = 0,
HIPEW_ERROR_OPEN_FAILED = -1,
HIPEW_ERROR_ATEXIT_FAILED = -2,
HIPEW_ERROR_OLD_DRIVER = -3,
};
enum {

View File

@@ -71,7 +71,6 @@ thipDriverGetVersion *hipDriverGetVersion;
thipGetDevice *hipGetDevice;
thipGetDeviceCount *hipGetDeviceCount;
thipGetDeviceProperties *hipGetDeviceProperties;
thipDeviceGet* hipDeviceGet;
thipDeviceGetName *hipDeviceGetName;
thipDeviceGetAttribute *hipDeviceGetAttribute;
thipDeviceComputeCapability *hipDeviceComputeCapability;
@@ -214,36 +213,6 @@ static void hipewHipExit(void) {
}
}
#ifdef _WIN32
static int hipewHasOldDriver(const char *hip_path) {
DWORD verHandle = 0;
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
int old_driver = 0;
if (verSize != 0) {
LPSTR verData = (LPSTR)malloc(verSize);
if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
LPBYTE lpBuffer = NULL;
UINT size = 0;
if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
if (size) {
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
/* Magic value from
* https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
if (verInfo->dwSignature == 0xfeef04bd) {
unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
/* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
old_driver = ((fileVersionLS0 < 3354) || (fileVersionLS0 == 3354 && fileversionLS1 < 13));
}
}
}
}
free(verData);
}
return old_driver;
}
#endif
static int hipewHipInit(void) {
/* Library paths. */
#ifdef _WIN32
@@ -271,14 +240,6 @@ static int hipewHipInit(void) {
return result;
}
#ifdef _WIN32
/* Test for driver version. */
if(hipewHasOldDriver(hip_paths[0])) {
result = HIPEW_ERROR_OLD_DRIVER;
return result;
}
#endif
/* Load library. */
hip_lib = dynamic_library_open_find(hip_paths);
@@ -294,7 +255,6 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);

View File

@@ -226,9 +226,6 @@ add_definitions(
-DCCL_NAMESPACE_END=}
)
if(WITH_CYCLES_DEBUG)
add_definitions(-DWITH_CYCLES_DEBUG)
endif()
if(WITH_CYCLES_STANDALONE_GUI)
add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
endif()
@@ -337,7 +334,7 @@ else()
endif()
# Warnings
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_C_COMPILER_ID MATCHES "Clang")
if(CMAKE_COMPILER_IS_GNUCXX)
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")

View File

@@ -138,6 +138,11 @@ endif()
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# avoid link failure with clang 3.4 debug
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -gline-tables-only")
endif()
add_dependencies(bf_intern_cycles bf_rna)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH})

View File

@@ -233,7 +233,6 @@ def list_render_passes(scene, srl):
if crl.denoising_store_passes:
yield ("Denoising Normal", "XYZ", 'VECTOR')
yield ("Denoising Albedo", "RGB", 'COLOR')
yield ("Denoising Depth", "Z", 'VALUE')
# Custom AOV passes.
for aov in srl.aovs:

View File

@@ -40,10 +40,10 @@ class AddPresetIntegrator(AddPresetBase, Operator):
"cycles.transparent_max_bounces",
"cycles.caustics_reflective",
"cycles.caustics_refractive",
"cycles.blur_glossy",
"cycles.use_fast_gi",
"cycles.ao_bounces",
"cycles.ao_bounces_render",
"cycles.blur_glossy"
"cycles.use_fast_gi"
"cycles.ao_bounces"
"cycles.ao_bounces_render"
]
preset_subdir = "cycles/integrator"

View File

@@ -87,7 +87,7 @@ enum_use_layer_samples = (
enum_sampling_pattern = (
('SOBOL', "Sobol", "Use Sobol random sampling pattern", 0),
('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
)
enum_volume_sampling = (
@@ -218,12 +218,6 @@ enum_denoising_prefilter = (
('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
)
enum_direct_light_sampling_type = (
('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling", "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0),
('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1),
('NEXT_EVENT_ESTIMATION', "Next-Event Estimation", "Direct light contributions are only sampled using next-event estimation", 2),
)
def update_render_passes(self, context):
scene = context.scene
view_layer = context.view_layer
@@ -331,13 +325,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=1024,
)
sample_offset: IntProperty(
name="Sample Offset",
description="Number of samples to skip when starting render",
min=0, max=(1 << 24),
default=0,
)
time_limit: FloatProperty(
name="Time Limit",
description="Limit the render time (excluding synchronization time)."
@@ -352,14 +339,14 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
name="Sampling Pattern",
description="Random sampling pattern used by the integrator. When adaptive sampling is enabled, Progressive Multi-Jitter is always used instead of Sobol",
items=enum_sampling_pattern,
default='PROGRESSIVE_MULTI_JITTER',
default='PROGRESSIVE_MUTI_JITTER',
)
scrambling_distance: FloatProperty(
name="Scrambling Distance",
default=1.0,
min=0.0, max=1.0,
description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low. Only works when not using adaptive sampling",
description="Lower values give faster rendering with GPU rendering and less noise with all devices at the cost of possible artifacts if set too low. Only works when not using adaptive sampling",
)
preview_scrambling_distance: BoolProperty(
name="Scrambling Distance viewport",
@@ -367,10 +354,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
)
auto_scrambling_distance: BoolProperty(
name="Automatic Scrambling Distance",
adaptive_scrambling_distance: BoolProperty(
name="Adaptive Scrambling Distance",
default=False,
description="Automatically reduce the randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts. Only works when not using adaptive sampling",
description="Uses a formula to adapt the scrambling distance strength based on the sample count",
)
use_layer_samples: EnumProperty(
@@ -428,13 +415,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=0,
)
direct_light_sampling_type: EnumProperty(
name="Direct Light Sampling",
description="The type of strategy used for sampling direct light contributions",
items=enum_direct_light_sampling_type,
default='MULTIPLE_IMPORTANCE_SAMPLING',
)
min_light_bounces: IntProperty(
name="Min Light Bounces",
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
@@ -790,8 +770,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
)
use_auto_tile: BoolProperty(
name="Use Tiling",
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
name="Auto Tiles",
description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
default=True,
)
tile_size: IntProperty(
@@ -1380,7 +1360,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
if len(devices) and compute_device_type != 'CPU':
if compute_device_type != 'CPU':
devices.extend(cpu_devices)
return devices
@@ -1398,18 +1378,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
self.refresh_devices()
return None
def get_compute_device_type(self):
if self.compute_device_type == '':
return 'NONE'
return self.compute_device_type
def get_num_gpu_devices(self):
import _cycles
compute_device_type = self.get_compute_device_type()
device_list = _cycles.available_devices(compute_device_type)
device_list = _cycles.available_devices(self.compute_device_type)
num = 0
for device in device_list:
if device[1] != compute_device_type:
if device[1] != self.compute_device_type:
continue
for dev in self.devices:
if dev.use and dev.id == device[2]:
@@ -1439,9 +1413,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
elif device_type == 'HIP':
import sys
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="Requires discrete AMD GPU with ??? architecture", icon='BLANK1')
if sys.platform[:3] == "win":
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
col.label(text="and AMD driver version ??? or newer", icon='BLANK1')
return
for device in devices:
@@ -1451,16 +1425,15 @@ class CyclesPreferences(bpy.types.AddonPreferences):
row = layout.row()
row.prop(self, "compute_device_type", expand=True)
compute_device_type = self.get_compute_device_type()
if compute_device_type == 'NONE':
if self.compute_device_type == 'NONE':
return
row = layout.row()
devices = self.get_devices_for_type(compute_device_type)
self._draw_devices(row, compute_device_type, devices)
devices = self.get_devices_for_type(self.compute_device_type)
self._draw_devices(row, self.compute_device_type, devices)
import _cycles
has_peer_memory = 0
for device in _cycles.available_devices(compute_device_type):
for device in _cycles.available_devices(self.compute_device_type):
if device[3] and self.find_existing_device_entry(device).use:
has_peer_memory += 1
if has_peer_memory > 1:

View File

@@ -290,18 +290,15 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
col.prop(cscene, "sampling_pattern", text="Pattern")
col = layout.column(align=True)
col.prop(cscene, "sample_offset")
layout.separator()
heading = layout.column(align=True, heading="Scrambling Distance")
heading.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
heading.prop(cscene, "auto_scrambling_distance", text="Automatic")
sub = heading.row()
col = layout.column(align=True)
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
col.prop(cscene, "scrambling_distance", text="Scrambling Distance")
col.prop(cscene, "adaptive_scrambling_distance", text="Adaptive")
sub = col.row(align=True)
sub.active = not cscene.use_preview_adaptive_sampling
sub.prop(cscene, "preview_scrambling_distance", text="Viewport")
heading.prop(cscene, "scrambling_distance", text="Multiplier")
layout.separator()
@@ -1054,7 +1051,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
def has_geometry_visibility(ob):
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'HAIR'}) or
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
@@ -1819,38 +1816,37 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False # No animation.
scene = context.scene
cscene = scene.cycles
col = layout.column(heading="CPU")
col = layout.column()
col.label(text="CPU Flags:")
row = col.row(align=True)
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
col.prop(cscene, "debug_bvh_layout", text="BVH")
col.prop(cscene, "debug_bvh_layout")
col.separator()
col = layout.column(heading="CUDA")
col = layout.column()
col.label(text="CUDA Flags:")
col.prop(cscene, "debug_use_cuda_adaptive_compile")
col = layout.column(heading="OptiX")
col.prop(cscene, "debug_use_optix_debug", text="Module Debug")
col.separator()
col.prop(cscene, "debug_bvh_type", text="Viewport BVH")
col = layout.column()
col.label(text="OptiX Flags:")
col.prop(cscene, "debug_use_optix_debug")
col.separator()
import _cycles
if _cycles.with_debug:
col.prop(cscene, "direct_light_sampling_type")
col = layout.column()
col.prop(cscene, "debug_bvh_type")
class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):

View File

@@ -86,7 +86,7 @@ def do_versions(self):
# Device might not currently be available so this can fail
try:
if system.legacy_compute_device_type == 1:
prop.compute_device_type = 'NONE' # Was OpenCL
prop.compute_device_type = 'OPENCL'
elif system.legacy_compute_device_type == 2:
prop.compute_device_type = 'CUDA'
else:
@@ -97,12 +97,6 @@ def do_versions(self):
# Init device list for UI
prop.get_devices(prop.compute_device_type)
if bpy.context.preferences.version <= (3, 0, 40):
# Disable OpenCL device
prop = bpy.context.preferences.addons[__package__].preferences
if prop.is_property_set("compute_device_type") and prop['compute_device_type'] == 4:
prop.compute_device_type = 'NONE'
# We don't modify startup file because it assumes to
# have all the default values only.
if not bpy.data.is_saved:
@@ -243,7 +237,7 @@ def do_versions(self):
cscene.use_preview_denoising = False
if not cscene.is_property_set("sampling_pattern") or \
cscene.get('sampling_pattern') >= 2:
cscene.sampling_pattern = 'PROGRESSIVE_MULTI_JITTER'
cscene.sampling_pattern = 'PROGRESSIVE_MUTI_JITTER'
# Removal of square samples.
cscene = scene.cycles

View File

@@ -639,7 +639,7 @@ void BlenderSync::sync_camera_motion(
/* TODO(sergey): De-duplicate calculation with camera sync. */
float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
if (fov != cam->get_fov()) {
VLOG(3) << "Camera " << b_ob.name() << " FOV change detected.";
VLOG(1) << "Camera " << b_ob.name() << " FOV change detected.";
if (motion_time == 0.0f) {
cam->set_fov(fov);
}

View File

@@ -199,7 +199,7 @@ static bool ObtainCacheParticleUV(Hair *hair,
b_mesh->uv_layers.begin(l);
float2 uv = zero_float2();
if (!b_mesh->uv_layers.empty())
if (b_mesh->uv_layers.length())
b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
CData->curve_uv.push_back_slow(uv);
@@ -261,7 +261,7 @@ static bool ObtainCacheParticleVcol(Hair *hair,
b_mesh->vertex_colors.begin(l);
float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
if (!b_mesh->vertex_colors.empty())
if (b_mesh->vertex_colors.length())
b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
CData->curve_vcol.push_back_slow(vcol);
@@ -304,6 +304,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
}
}
if (num_curves > 0) {
VLOG(1) << "Exporting curve segments for mesh " << hair->name;
}
hair->reserve_curves(hair->num_curves() + num_curves, hair->get_curve_keys().size() + num_keys);
num_keys = 0;
@@ -352,7 +356,7 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
/* check allocation */
if ((hair->get_curve_keys().size() != num_keys) || (hair->num_curves() != num_curves)) {
VLOG(1) << "Hair memory allocation failed, clearing data.";
VLOG(1) << "Allocation failed, clearing data";
hair->clear(true);
}
}
@@ -408,11 +412,16 @@ static void export_hair_motion_validate_attribute(Hair *hair,
if (num_motion_keys != num_keys || !have_motion) {
/* No motion or hair "topology" changed, remove attributes again. */
if (num_motion_keys != num_keys) {
VLOG(1) << "Hair topology changed, removing motion attribute.";
VLOG(1) << "Hair topology changed, removing attribute.";
}
else {
VLOG(1) << "No motion, removing attribute.";
}
hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
}
else if (motion_step > 0) {
VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
/* Motion, fill up previous steps that we might have skipped because
* they had no motion, but we need them anyway now. */
for (int step = 0; step < motion_step; step++) {
@@ -428,12 +437,16 @@ static void export_hair_motion_validate_attribute(Hair *hair,
static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
{
VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
<< motion_step;
/* find attribute */
Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
bool new_attribute = false;
/* add new attribute if it doesn't exist already */
if (!attr_mP) {
VLOG(1) << "Creating new motion vertex position attribute";
attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
new_attribute = true;
}
@@ -669,6 +682,10 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
const int num_keys = b_hair.points.length();
const int num_curves = b_hair.curves.length();
if (num_curves > 0) {
VLOG(1) << "Exporting curve segments for hair " << hair->name;
}
hair->reserve_curves(num_curves, num_keys);
/* Export curves and points. */
@@ -726,11 +743,15 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
{
VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
<< motion_step;
/* Find or add attribute. */
Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
bool new_attribute = false;
if (!attr_mP) {
VLOG(1) << "Creating new motion vertex position attribute";
attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
new_attribute = true;
}
@@ -819,14 +840,11 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
new_hair.set_used_shaders(used_shaders);
if (view_layer.use_hair) {
#ifdef WITH_HAIR_NODES
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
/* Hair object. */
sync_hair(&new_hair, b_ob_info, false);
}
else
#endif
{
else {
/* Particle hair. */
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
BL::Mesh b_mesh = object_to_mesh(
@@ -873,15 +891,12 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
/* Export deformed coordinates. */
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
#ifdef WITH_HAIR_NODES
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
/* Hair object. */
sync_hair(hair, b_ob_info, true, motion_step);
return;
}
else
#endif
{
else {
/* Particle hair. */
BL::Mesh b_mesh = object_to_mesh(
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);

View File

@@ -334,7 +334,7 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
/* Update PBO dimensions if needed.
*
* NOTE: Allocate the PBO for the size which will fit the final render resolution (as in,
* NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
* at a resolution divider 1. This was we don't need to recreate graphics interoperability
* objects which are costly and which are tied to the specific underlying buffer size.
* The downside of this approach is that when graphics interoperability is not used we are

View File

@@ -31,11 +31,7 @@ CCL_NAMESPACE_BEGIN
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
{
#ifdef WITH_HAIR_NODES
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
#else
if (use_particle_hair) {
#endif
return Geometry::HAIR;
}
@@ -219,11 +215,7 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
if (progress.get_cancel())
return;
#ifdef WITH_HAIR_NODES
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
#else
if (use_particle_hair) {
#endif
Hair *hair = static_cast<Hair *>(geom);
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
}

View File

@@ -555,7 +555,7 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh,
/* Create uv map attributes. */
static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
{
if (!b_mesh.uv_layers.empty()) {
if (b_mesh.uv_layers.length() != 0) {
for (BL::MeshUVLoopLayer &l : b_mesh.uv_layers) {
const bool active_render = l.active_render();
AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
@@ -619,7 +619,7 @@ static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
{
if (!b_mesh.uv_layers.empty()) {
if (b_mesh.uv_layers.length() != 0) {
BL::Mesh::uv_layers_iterator l;
int i = 0;
@@ -951,7 +951,7 @@ static void create_mesh(Scene *scene,
N = attr_N->data_float3();
/* create generated coordinates from undeformed coordinates */
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.empty()) &&
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.length() == 0) &&
(mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
Attribute *attr = attributes.add(ATTR_STD_GENERATED);

View File

@@ -62,46 +62,31 @@ bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
return false;
}
bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)
bool BlenderSync::object_is_geometry(BL::Object &b_ob)
{
BL::ID b_ob_data = b_ob_info.object_data;
BL::ID b_ob_data = b_ob.data();
if (!b_ob_data) {
return false;
}
BL::Object::type_enum type = b_ob_info.iter_object.type();
BL::Object::type_enum type = b_ob.type();
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
/* Will be exported attached to mesh. */
return true;
}
else if (type == BL::Object::type_CURVE) {
/* Skip exporting curves without faces, overhead can be
* significant if there are many for path animation. */
BL::Curve b_curve(b_ob_data);
/* Other object types that are not meshes but evaluate to meshes are presented to render engines
* as separate instance objects. Metaballs and surface objects have not been affected by that
* change yet. */
if (type == BL::Object::type_SURFACE || type == BL::Object::type_META) {
return true;
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
}
return b_ob_data.is_a(&RNA_Mesh);
}
bool BlenderSync::object_can_have_geometry(BL::Object &b_ob)
{
BL::Object::type_enum type = b_ob.type();
switch (type) {
case BL::Object::type_MESH:
case BL::Object::type_CURVE:
case BL::Object::type_SURFACE:
case BL::Object::type_META:
case BL::Object::type_FONT:
case BL::Object::type_HAIR:
case BL::Object::type_POINTCLOUD:
case BL::Object::type_VOLUME:
return true;
default:
return false;
else {
return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
b_ob_data.is_a(&RNA_MetaBall));
}
}
@@ -176,11 +161,6 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
if (is_instance) {
persistent_id_array = b_instance.persistent_id();
persistent_id = persistent_id_array.data;
if (!b_ob_info.is_real_object_data()) {
/* Remember which object data the geometry is coming from, so that we can sync it when the
* object has changed. */
instance_geometries_by_object[b_ob_info.real_object.ptr.data].insert(b_ob_info.object_data);
}
}
/* light is handled separately */
@@ -207,7 +187,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
/* only interested in object that we can create meshes from */
if (!object_is_geometry(b_ob_info)) {
if (!object_is_geometry(b_ob)) {
return NULL;
}
@@ -294,7 +274,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
object->set_visibility(visibility);
object->set_is_shadow_catcher(b_ob.is_shadow_catcher() || b_parent.is_shadow_catcher());
object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
@@ -580,7 +560,6 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
else {
geometry_motion_synced.clear();
}
instance_geometries_by_object.clear();
/* initialize culling */
BlenderObjectCulling culling(scene, b_scene);

View File

@@ -157,6 +157,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
DebugFlags().running_inside_blender = true;
VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
Py_RETURN_NONE;
}
@@ -883,6 +885,8 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
debug_flags_sync_from_scene(b_scene);
VLOG(2) << "Debug flags set to:\n" << DebugFlags();
debug_flags_set = true;
Py_RETURN_NONE;
@@ -892,6 +896,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
{
debug_flags_reset();
if (debug_flags_set) {
VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
debug_flags_set = false;
}
Py_RETURN_NONE;
@@ -1054,13 +1059,5 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
}
#ifdef WITH_CYCLES_DEBUG
PyModule_AddObject(mod, "with_debug", Py_True);
Py_INCREF(Py_True);
#else /* WITH_CYCLES_DEBUG */
PyModule_AddObject(mod, "with_debug", Py_False);
Py_INCREF(Py_False);
#endif /* WITH_CYCLES_DEBUG */
return (void *)mod;
}

View File

@@ -129,7 +129,7 @@ void BlenderSession::create_session()
/* reset status/progress */
last_status = "";
last_error = "";
last_progress = -1.0;
last_progress = -1.0f;
start_resize_time = 0.0;
/* create session */
@@ -615,24 +615,6 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
sync->sync_camera(b_render, b_camera_override, width, height, "");
sync->sync_data(
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
/* Filtering settings for combined pass. */
if (pass->get_type() == PASS_COMBINED) {
Integrator *integrator = scene->integrator;
integrator->set_use_direct_light((bake_filter & BL::BakeSettings::pass_filter_DIRECT) != 0);
integrator->set_use_indirect_light((bake_filter & BL::BakeSettings::pass_filter_INDIRECT) !=
0);
integrator->set_use_diffuse((bake_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0);
integrator->set_use_glossy((bake_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0);
integrator->set_use_transmission(
(bake_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0);
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
}
/* Always use transpanent background for baking. */
scene->background->set_transparent(true);
/* Load built-in images from Blender. */
builtin_images_load();
}
@@ -859,7 +841,7 @@ void BlenderSession::get_status(string &status, string &substatus)
session->progress.get_status(status, substatus);
}
void BlenderSession::get_progress(double &progress, double &total_time, double &render_time)
void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
{
session->progress.get_time(total_time, render_time);
progress = session->progress.get_progress();
@@ -867,10 +849,10 @@ void BlenderSession::get_progress(double &progress, double &total_time, double &
void BlenderSession::update_bake_progress()
{
double progress = session->progress.get_progress();
float progress = session->progress.get_progress();
if (progress != last_progress) {
b_engine.update_progress((float)progress);
b_engine.update_progress(progress);
last_progress = progress;
}
}
@@ -879,7 +861,7 @@ void BlenderSession::update_status_progress()
{
string timestatus, status, substatus;
string scene_status = "";
double progress;
float progress;
double total_time, remaining_time = 0, render_time;
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
@@ -923,7 +905,7 @@ void BlenderSession::update_status_progress()
last_status_time = current_time;
}
if (progress != last_progress) {
b_engine.update_progress((float)progress);
b_engine.update_progress(progress);
last_progress = progress;
}

View File

@@ -82,7 +82,7 @@ class BlenderSession {
void tag_redraw();
void tag_update();
void get_status(string &status, string &substatus);
void get_progress(double &progress, double &total_time, double &render_time);
void get_progress(float &progress, double &total_time, double &render_time);
void test_cancel();
void update_status_progress();
void update_bake_progress();
@@ -108,7 +108,7 @@ class BlenderSession {
string last_status;
string last_error;
double last_progress;
float last_progress;
double last_status_time;
int width, height;

View File

@@ -162,19 +162,19 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
/* Object */
else if (b_id.is_a(&RNA_Object)) {
BL::Object b_ob(b_id);
const bool can_have_geometry = object_can_have_geometry(b_ob);
const bool is_light = !can_have_geometry && object_is_light(b_ob);
const bool is_geometry = object_is_geometry(b_ob);
const bool is_light = !is_geometry && object_is_light(b_ob);
if (b_ob.is_instancer() && b_update.is_updated_shading()) {
/* Needed for e.g. object color updates on instancer. */
object_map.set_recalc(b_ob);
}
if (can_have_geometry || is_light) {
if (is_geometry || is_light) {
const bool updated_geometry = b_update.is_updated_geometry();
/* Geometry (mesh, hair, volume). */
if (can_have_geometry) {
if (is_geometry) {
if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
object_map.set_recalc(b_ob);
}
@@ -183,15 +183,6 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
geometry_map.set_recalc(key);
/* Sync all contained geometry instances as well when the object changed.. */
map<void *, set<BL::ID>>::const_iterator instance_geometries =
instance_geometries_by_object.find(b_ob.ptr.data);
if (instance_geometries != instance_geometries_by_object.end()) {
for (BL::ID geometry : instance_geometries->second) {
geometry_map.set_recalc(geometry);
}
}
}
if (updated_geometry) {
@@ -365,8 +356,8 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
int samples = get_int(cscene, "samples");
float scrambling_distance = get_float(cscene, "scrambling_distance");
bool auto_scrambling_distance = get_boolean(cscene, "auto_scrambling_distance");
if (auto_scrambling_distance) {
bool adaptive_scrambling_distance = get_boolean(cscene, "adaptive_scrambling_distance");
if (adaptive_scrambling_distance) {
scrambling_distance *= 4.0f / sqrtf(samples);
}
@@ -375,9 +366,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
if ((preview && !preview_scrambling_distance) || use_adaptive_sampling)
scrambling_distance = 1.0f;
if (scrambling_distance != 1.0f) {
VLOG(3) << "Using scrambling distance: " << scrambling_distance;
}
VLOG(1) << "Used Scrambling Distance: " << scrambling_distance;
integrator->set_scrambling_distance(scrambling_distance);
if (get_boolean(cscene, "use_fast_gi")) {
@@ -392,12 +381,6 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
integrator->set_ao_bounces(0);
}
#ifdef WITH_CYCLES_DEBUG
DirectLightSamplingType direct_light_sampling_type = (DirectLightSamplingType)get_enum(
cscene, "direct_light_sampling_type", DIRECT_LIGHT_SAMPLING_NUM, DIRECT_LIGHT_SAMPLING_MIS);
integrator->set_direct_light_sampling_type(direct_light_sampling_type);
#endif
const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
integrator->set_use_denoise(denoise_params.use);
@@ -841,25 +824,18 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* samples */
int samples = get_int(cscene, "samples");
int preview_samples = get_int(cscene, "preview_samples");
int sample_offset = get_int(cscene, "sample_offset");
if (background) {
params.samples = samples;
params.sample_offset = sample_offset;
}
else {
params.samples = preview_samples;
if (params.samples == 0) {
if (params.samples == 0)
params.samples = INT_MAX;
}
params.sample_offset = 0;
}
/* Clamp sample offset. */
params.sample_offset = clamp(params.sample_offset, 0, Integrator::MAX_SAMPLES);
/* Clamp samples. */
params.samples = clamp(params.samples, 0, Integrator::MAX_SAMPLES - params.sample_offset);
params.samples = min(params.samples, Integrator::MAX_SAMPLES);
/* Viewport Performance */
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
@@ -878,7 +854,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* Time limit. */
if (background) {
params.time_limit = (double)get_float(cscene, "time_limit");
params.time_limit = get_float(cscene, "time_limit");
}
else {
/* For the viewport it kind of makes more sense to think in terms of the noise floor, which is

View File

@@ -208,8 +208,7 @@ class BlenderSync {
/* util */
void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object &b_ob);
bool object_is_geometry(BObjectInfo &b_ob_info);
bool object_can_have_geometry(BL::Object &b_ob);
bool object_is_geometry(BL::Object &b_ob);
bool object_is_light(BL::Object &b_ob);
/* variables */
@@ -226,8 +225,6 @@ class BlenderSync {
set<Geometry *> geometry_synced;
set<Geometry *> geometry_motion_synced;
set<Geometry *> geometry_motion_attribute_synced;
/** Remember which geometries come from which objects to be able to sync them after changes. */
map<void *, set<BL::ID>> instance_geometries_by_object;
set<float> motion_times;
void *world_map;
bool world_recalc;

View File

@@ -303,7 +303,7 @@ static inline string image_user_file_path(BL::ImageUser &iuser,
string filepath_str = string(filepath);
if (load_tiled && ima.source() == BL::Image::source_TILED) {
string udim;
if (!ima.tiles.empty()) {
if (ima.tiles.length() > 0) {
udim = to_string(ima.tiles[0].number());
}
string_replace(filepath_str, udim, "<UDIM>");
@@ -647,7 +647,7 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
{
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
if (cobj.data && !b_ob.modifiers.empty() && experimental) {
if (cobj.data && b_ob.modifiers.length() > 0 && experimental) {
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
bool enabled = preview ? mod.show_viewport() : mod.show_render();

View File

@@ -303,7 +303,7 @@ static void rtc_error_func(void *, enum RTCError, const char *str)
VLOG(1) << str;
}
static double progress_start_time = 0.0;
static double progress_start_time = 0.0f;
static bool rtc_progress_func(void *user_ptr, const double n)
{

View File

@@ -153,7 +153,7 @@ void BVHNode::update_time()
namespace {
struct DumpTraversalContext {
/* Descriptor of while where writing is happening. */
/* Descriptor of wile where writing is happening. */
FILE *stream;
/* Unique identifier of the node current. */
int id;

View File

@@ -178,7 +178,7 @@ class InnerNode : public BVHNode {
reset_unused_children();
}
/* NOTE: This function is only used during binary BVH builder, and it's
/* NOTE: This function is only used during binary BVH builder, and it
* supposed to be configured to have 2 children which will be filled-in in a
* bit. But this is important to have children reset to NULL. */
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)

View File

@@ -30,17 +30,15 @@ BVHOptiX::BVHOptiX(const BVHParams &params_,
: BVH(params_, geometry_, objects_),
device(device),
traversable_handle(0),
as_data(make_unique<device_only_memory<char>>(
device, params.top_level ? "optix tlas" : "optix blas", false)),
motion_transform_data(
make_unique<device_only_memory<char>>(device, "optix motion transform", false))
as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
motion_transform_data(device, "optix motion transform", false)
{
}
BVHOptiX::~BVHOptiX()
{
/* Acceleration structure memory is delayed freed on device, since deleting the
* BVH may happen while still being used for rendering. */
// Acceleration structure memory is delayed freed on device, since deleting the
// BVH may happen while still being used for rendering.
device->release_optix_bvh(this);
}

View File

@@ -25,16 +25,14 @@
# include "device/memory.h"
# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
class BVHOptiX : public BVH {
public:
Device *device;
uint64_t traversable_handle;
unique_ptr<device_only_memory<char>> as_data;
unique_ptr<device_only_memory<char>> motion_transform_data;
device_only_memory<char> as_data;
device_only_memory<char> motion_transform_data;
protected:
friend class BVH;

View File

@@ -88,7 +88,7 @@ endmacro()
function(cycles_link_directories)
if(APPLE)
# APPLE platform uses full paths for linking libraries, and avoids link_directories.
# APPLE plaform uses full paths for linking libraries, and avoids link_directories.
return()
endif()

View File

@@ -38,6 +38,7 @@ void device_cpu_info(vector<DeviceInfo> &devices)
info.id = "CPU";
info.num = 0;
info.has_osl = true;
info.has_half_images = true;
info.has_nanovdb = true;
info.has_profiling = true;
if (openimagedenoise_supported()) {

View File

@@ -68,8 +68,8 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_
{
/* Pick any kernel, all of them are supposed to have same level of microarchitecture
* optimization. */
VLOG(1) << "Using " << get_cpu_kernels().integrator_init_from_camera.get_uarch_name()
<< " CPU kernels.";
VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
<< " kernels.";
if (info.cpu_threads == 0) {
info.cpu_threads = TaskScheduler::num_threads();
@@ -93,6 +93,11 @@ CPUDevice::~CPUDevice()
texture_info.free();
}
bool CPUDevice::show_samples() const
{
return (info.cpu_threads == 1);
}
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
@@ -292,6 +297,11 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
Device::build_bvh(bvh, progress, refit);
}
const CPUKernels *CPUDevice::get_cpu_kernels() const
{
return &kernels;
}
void CPUDevice::get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
{

View File

@@ -57,9 +57,13 @@ class CPUDevice : public Device {
RTCDevice embree_device;
#endif
CPUKernels kernels;
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
~CPUDevice();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
/* Returns true if the texture info was copied to the device (meaning, some more
@@ -86,6 +90,7 @@ class CPUDevice : public Device {
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
virtual const CPUKernels *get_cpu_kernels() const override;
virtual void get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
virtual void *get_cpu_osl_memory() override;

View File

@@ -26,9 +26,6 @@ CCL_NAMESPACE_BEGIN
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
#define REGISTER_KERNEL_FILM_CONVERT(name) \
film_convert_##name(KERNEL_FUNCTIONS(film_convert_##name)), \
film_convert_half_rgba_##name(KERNEL_FUNCTIONS(film_convert_half_rgba_##name))
CPUKernels::CPUKernels()
: /* Integrator. */
@@ -53,25 +50,11 @@ CPUKernels::CPUKernels()
REGISTER_KERNEL(adaptive_sampling_filter_x),
REGISTER_KERNEL(adaptive_sampling_filter_y),
/* Cryptomatte. */
REGISTER_KERNEL(cryptomatte_postprocess),
/* Film Convert. */
REGISTER_KERNEL_FILM_CONVERT(depth),
REGISTER_KERNEL_FILM_CONVERT(mist),
REGISTER_KERNEL_FILM_CONVERT(sample_count),
REGISTER_KERNEL_FILM_CONVERT(float),
REGISTER_KERNEL_FILM_CONVERT(light_path),
REGISTER_KERNEL_FILM_CONVERT(float3),
REGISTER_KERNEL_FILM_CONVERT(motion),
REGISTER_KERNEL_FILM_CONVERT(cryptomatte),
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher),
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow),
REGISTER_KERNEL_FILM_CONVERT(combined),
REGISTER_KERNEL_FILM_CONVERT(float4)
REGISTER_KERNEL(cryptomatte_postprocess)
{
}
#undef REGISTER_KERNEL
#undef REGISTER_KERNEL_FILM_CONVERT
#undef KERNEL_FUNCTIONS
CCL_NAMESPACE_END

View File

@@ -17,13 +17,11 @@
#pragma once
#include "device/cpu/kernel_function.h"
#include "util/half.h"
#include "util/types.h"
CCL_NAMESPACE_BEGIN
struct KernelGlobalsCPU;
struct KernelFilmConvert;
struct IntegratorStateCPU;
struct TileInfo;
@@ -42,7 +40,7 @@ class CPUKernels {
IntegratorInitFunction integrator_init_from_camera;
IntegratorInitFunction integrator_init_from_bake;
IntegratorShadeFunction integrator_intersect_closest;
IntegratorFunction integrator_intersect_closest;
IntegratorFunction integrator_intersect_shadow;
IntegratorFunction integrator_intersect_subsurface;
IntegratorFunction integrator_intersect_volume_stack;
@@ -104,41 +102,6 @@ class CPUKernels {
CryptomattePostprocessFunction cryptomatte_postprocess;
/* Film Convert. */
using FilmConvertFunction = CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
const float *buffer,
float *pixel,
const int width,
const int buffer_stride,
const int pixel_stride)>;
using FilmConvertHalfRGBAFunction =
CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
const float *buffer,
half4 *pixel,
const int width,
const int buffer_stride)>;
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
FilmConvertFunction film_convert_##name; \
FilmConvertHalfRGBAFunction film_convert_half_rgba_##name;
KERNEL_FILM_CONVERT_FUNCTION(depth)
KERNEL_FILM_CONVERT_FUNCTION(mist)
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
KERNEL_FILM_CONVERT_FUNCTION(float)
KERNEL_FILM_CONVERT_FUNCTION(light_path)
KERNEL_FILM_CONVERT_FUNCTION(float3)
KERNEL_FILM_CONVERT_FUNCTION(motion)
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
KERNEL_FILM_CONVERT_FUNCTION(combined)
KERNEL_FILM_CONVERT_FUNCTION(float4)
#undef KERNEL_FILM_CONVERT_FUNCTION
CPUKernels();
};

View File

@@ -144,6 +144,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
info.description = string(name);
info.num = num;
info.has_half_images = (major >= 3);
info.has_nanovdb = true;
info.denoisers = 0;

View File

@@ -46,6 +46,12 @@ bool CUDADevice::have_precompiled_kernels()
return path_exists(cubins_path);
}
bool CUDADevice::show_samples() const
{
/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
return true;
}
BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
@@ -236,10 +242,6 @@ string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
cflags += " -DWITH_NANOVDB";
# endif
# ifdef WITH_CYCLES_DEBUG
cflags += " -DWITH_CYCLES_DEBUG";
# endif
return cflags;
}
@@ -376,9 +378,7 @@ string CUDADevice::compile_kernel(const uint kernel_features,
cubin.c_str(),
common_cflags.c_str());
printf("Compiling %sCUDA kernel ...\n%s\n",
(use_adaptive_compilation()) ? "adaptive " : "",
command.c_str());
printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
# ifdef _WIN32
command = "call " + command;
@@ -405,15 +405,13 @@ string CUDADevice::compile_kernel(const uint kernel_features,
bool CUDADevice::load_kernels(const uint kernel_features)
{
/* TODO(sergey): Support kernels re-load for CUDA devices adaptive compile.
/* TODO(sergey): Support kernels re-load for CUDA devices.
*
* Currently re-loading kernel will invalidate memory pointers,
* causing problems in cuCtxSynchronize.
*/
if (cuModule) {
if (use_adaptive_compilation()) {
VLOG(1) << "Skipping CUDA kernel reload for adaptive compilation, not currently supported.";
}
VLOG(1) << "Skipping kernel reload, not currently supported.";
return true;
}
@@ -775,7 +773,6 @@ void CUDADevice::generic_free(device_memory &mem)
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
/* If cmem.use_mapped_host is true, reference counting is used
@@ -930,6 +927,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
{
CUDAContextScope scope(this);
/* General variables for both architectures */
string bind_name = mem.name;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
@@ -1092,6 +1090,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
/* Kepler+, bindless textures. */
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(resDesc));
@@ -1142,7 +1141,6 @@ void CUDADevice::tex_free(device_texture &mem)
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
if (cmem.texobject) {

View File

@@ -76,6 +76,8 @@ class CUDADevice : public Device {
static bool have_precompiled_kernels();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;

View File

@@ -23,7 +23,6 @@
#include "device/queue.h"
#include "device/cpu/device.h"
#include "device/cpu/kernel.h"
#include "device/cuda/device.h"
#include "device/dummy/device.h"
#include "device/hip/device.h"
@@ -286,6 +285,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.description = "Multi Device";
info.num = 0;
info.has_half_images = true;
info.has_nanovdb = true;
info.has_osl = true;
info.has_profiling = true;
@@ -332,6 +332,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_nanovdb &= device.has_nanovdb;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
@@ -362,11 +363,10 @@ unique_ptr<DeviceQueue> Device::gpu_queue_create()
return nullptr;
}
const CPUKernels &Device::get_cpu_kernels()
const CPUKernels *Device::get_cpu_kernels() const
{
/* Initialize CPU kernels once and reuse. */
static CPUKernels kernels;
return kernels;
LOG(FATAL) << "Device does not support CPU kernels.";
return nullptr;
}
void Device::get_cpu_kernel_thread_globals(

View File

@@ -73,6 +73,7 @@ class DeviceInfo {
int num;
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_half_images; /* Support half-float textures. */
bool has_osl; /* Support Open Shading Language. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
@@ -89,6 +90,7 @@ class DeviceInfo {
num = 0;
cpu_threads = 0;
display_device = false;
has_half_images = false;
has_nanovdb = false;
has_osl = false;
has_profiling = false;
@@ -149,6 +151,10 @@ class Device {
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const
{
return false;
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* statistics */
@@ -174,7 +180,7 @@ class Device {
* These may not be used on GPU or multi-devices. */
/* Get CPU kernel functions for native instruction set. */
static const CPUKernels &get_cpu_kernels();
virtual const CPUKernels *get_cpu_kernels() const;
/* Get kernel globals to pass to kernels. */
virtual void get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);

View File

@@ -57,16 +57,9 @@ bool device_hip_init()
}
}
else {
if (hipew_result == HIPEW_ERROR_ATEXIT_FAILED) {
VLOG(1) << "HIPEW initialization failed: Error setting up atexit() handler";
}
else if (hipew_result == HIPEW_ERROR_OLD_DRIVER) {
VLOG(1) << "HIPEW initialization failed: Driver version too old, requires AMD Radeon Pro "
"21.Q4 driver or newer";
}
else {
VLOG(1) << "HIPEW initialization failed: Error opening HIP dynamic library";
}
VLOG(1) << "HIPEW initialization failed: "
<< ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
"Error opening the library");
}
return result;
@@ -138,9 +131,9 @@ void device_hip_info(vector<DeviceInfo> &devices)
continue;
}
if (!hipSupportsDevice(num)) {
continue;
}
int major;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
// TODO : (Arya) What is the last major version we are supporting?
DeviceInfo info;
@@ -148,6 +141,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
info.description = string(name);
info.num = num;
info.has_half_images = (major >= 3);
info.has_nanovdb = true;
info.denoisers = 0;

View File

@@ -47,6 +47,12 @@ bool HIPDevice::have_precompiled_kernels()
return path_exists(fatbins_path);
}
bool HIPDevice::show_samples() const
{
/* The HIPDevice only processes one tile at a time, so showing samples is fine. */
return true;
}
BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
@@ -93,7 +99,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
}
/* Setup device and context. */
result = hipDeviceGet(&hipDevice, hipDevId);
result = hipGetDevice(&hipDevice, hipDevId);
if (result != hipSuccess) {
set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
hipewErrorString(result)));
@@ -140,18 +146,12 @@ HIPDevice::~HIPDevice()
bool HIPDevice::support_device(const uint /*kernel_features*/)
{
if (hipSupportsDevice(hipDevId)) {
return true;
}
else {
/* We only support Navi and above. */
hipDeviceProp_t props;
hipGetDeviceProperties(&props, hipDevId);
int major, minor;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
props.name));
return false;
}
// TODO : (Arya) What versions do we plan to support?
return true;
}
bool HIPDevice::check_peer_access(Device *peer_device)
@@ -216,6 +216,7 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
const string include_path = source_path;
string cflags = string_printf(
"-m%d "
"--ptxas-options=\"-v\" "
"--use_fast_math "
"-DHIPCC "
"-I\"%s\"",
@@ -227,7 +228,10 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
return cflags;
}
string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
string HIPDevice::compile_kernel(const uint kernel_features,
const char *name,
const char *base,
bool force_ptx)
{
/* Compute kernel name. */
int major, minor;
@@ -236,20 +240,35 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
hipDeviceProp_t props;
hipGetDeviceProperties(&props, hipDevId);
/* gcnArchName can contain tokens after the arch name with features, ie.
* `gfx1010:sramecc-:xnack-` so we tokenize it to get the first part. */
char *arch = strtok(props.gcnArchName, ":");
if (arch == NULL) {
arch = props.gcnArchName;
}
/* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) {
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
return fatbin;
if (!force_ptx) {
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
return fatbin;
}
}
/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
int ptx_major = major, ptx_minor = minor;
while (ptx_major >= 3) {
const string ptx = path_get(
string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
if (path_exists(ptx)) {
VLOG(1) << "Using precompiled kernel.";
return ptx;
}
if (ptx_minor > 0) {
ptx_minor--;
}
else {
ptx_major--;
ptx_minor = 9;
}
}
}
@@ -273,10 +292,12 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
# ifdef _DEBUG
options.append(" -save-temps");
# endif
options.append(" --amdgpu-target=").append(arch);
options.append(" --amdgpu-target=").append(props.gcnArchName);
const string include_path = source_path;
const string fatbin_file = string_printf("cycles_%s_%s_%s", name, arch, kernel_md5.c_str());
const char *const kernel_arch = props.gcnArchName;
const string fatbin_file = string_printf(
"cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
@@ -286,9 +307,9 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
# ifdef _WIN32
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
if (!hipSupportsDevice(hipDevId)) {
if (major < 3) {
set_error(
string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
string_printf("HIP backend requires compute capability 3.0 or up, but found %d.%d. "
"Your GPU is not supported.",
major,
minor));
@@ -339,9 +360,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
source_path.c_str(),
fatbin.c_str());
printf("Compiling %sHIP kernel ...\n%s\n",
(use_adaptive_compilation()) ? "adaptive " : "",
command.c_str());
printf("Compiling HIP kernel ...\n%s\n", command.c_str());
# ifdef _WIN32
command = "call " + command;
@@ -368,14 +387,13 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
bool HIPDevice::load_kernels(const uint kernel_features)
{
/* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
/* TODO(sergey): Support kernels re-load for HIP devices.
*
* Currently re-loading kernels will invalidate memory pointers.
* Currently re-loading kernel will invalidate memory pointers,
* causing problems in hipCtxSynchronize.
*/
if (hipModule) {
if (use_adaptive_compilation()) {
VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
}
VLOG(1) << "Skipping kernel reload, not currently supported.";
return true;
}
@@ -384,9 +402,8 @@ bool HIPDevice::load_kernels(const uint kernel_features)
return false;
/* check if GPU is supported */
if (!support_device(kernel_features)) {
if (!support_device(kernel_features))
return false;
}
/* get kernel */
const char *kernel_name = "kernel";
@@ -738,7 +755,6 @@ void HIPDevice::generic_free(device_memory &mem)
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
/* If cmem.use_mapped_host is true, reference counting is used
@@ -892,6 +908,7 @@ void HIPDevice::tex_alloc(device_texture &mem)
{
HIPContextScope scope(this);
/* General variables for both architectures */
string bind_name = mem.name;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
@@ -981,16 +998,16 @@ void HIPDevice::tex_alloc(device_texture &mem)
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
hip_assert(hipArray3DCreate(&array_3d, &desc));
if (!array_3d) {
return;
}
HIP_MEMCPY3D param;
memset(&param, 0, sizeof(HIP_MEMCPY3D));
memset(&param, 0, sizeof(param));
param.dstMemoryType = hipMemoryTypeArray;
param.dstArray = array_3d;
param.dstArray = &array_3d;
param.srcMemoryType = hipMemoryTypeHost;
param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch;
@@ -1056,13 +1073,13 @@ void HIPDevice::tex_alloc(device_texture &mem)
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
/* Bindless textures. */
/* Kepler+, bindless textures. */
hipResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
if (array_3d) {
resDesc.resType = hipResourceTypeArray;
resDesc.res.array.h_Array = array_3d;
resDesc.res.array.h_Array = &array_3d;
resDesc.flags = 0;
}
else if (mem.data_height > 0) {
@@ -1107,7 +1124,6 @@ void HIPDevice::tex_free(device_texture &mem)
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
if (cmem.texobject) {
@@ -1148,8 +1164,6 @@ bool HIPDevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
# if 0
HIPContextScope scope(this);
int num_all_devices = 0;
@@ -1168,7 +1182,6 @@ bool HIPDevice::should_use_graphics_interop()
return true;
}
}
# endif
return false;
}

View File

@@ -75,6 +75,8 @@ class HIPDevice : public Device {
static bool have_precompiled_kernels();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;
@@ -91,7 +93,10 @@ class HIPDevice : public Device {
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
string compile_kernel(const uint kernel_features,
const char *name,
const char *base = "hip",
bool force_ptx = false);
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);

View File

@@ -48,7 +48,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
HIPDeviceQueue *queue_ = nullptr;
HIPDevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
uint opengl_pbo_id_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;

View File

@@ -58,15 +58,6 @@ const char *hipewCompilerPath();
int hipewCompilerVersion();
# endif /* WITH_HIP_DYNLOAD */
static inline bool hipSupportsDevice(const int hipDevId)
{
int major, minor;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
return (major > 10) || (major == 10 && minor >= 1);
}
CCL_NAMESPACE_END
#endif /* WITH_HIP */

View File

@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN
device_memory::device_memory(Device *device, const char *name, MemoryType type)
: data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements),
data_elements(device_type_traits<uchar>::num_elements_cpu),
data_size(0),
device_size(0),
data_width(0),
@@ -44,6 +44,45 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
{
}
device_memory::device_memory(device_memory &&other) noexcept
: data_type(other.data_type),
data_elements(other.data_elements),
data_size(other.data_size),
device_size(other.device_size),
data_width(other.data_width),
data_height(other.data_height),
data_depth(other.data_depth),
type(other.type),
name(other.name),
device(other.device),
device_pointer(other.device_pointer),
host_pointer(other.host_pointer),
shared_pointer(other.shared_pointer),
shared_counter(other.shared_counter),
original_device_ptr(other.original_device_ptr),
original_device_size(other.original_device_size),
original_device(other.original_device),
need_realloc_(other.need_realloc_),
modified(other.modified)
{
other.data_elements = 0;
other.data_size = 0;
other.device_size = 0;
other.data_width = 0;
other.data_height = 0;
other.data_depth = 0;
other.device = 0;
other.device_pointer = 0;
other.host_pointer = 0;
other.shared_pointer = 0;
other.shared_counter = 0;
other.original_device_ptr = 0;
other.original_device_size = 0;
other.original_device = 0;
other.need_realloc_ = false;
other.modified = false;
}
device_memory::~device_memory()
{
assert(shared_pointer == 0);

View File

@@ -81,140 +81,155 @@ static constexpr size_t datatype_size(DataType datatype)
template<typename T> struct device_type_traits {
static const DataType data_type = TYPE_UNKNOWN;
static const size_t num_elements = sizeof(T);
static const size_t num_elements_cpu = sizeof(T);
static const size_t num_elements_gpu = sizeof(T);
};
template<> struct device_type_traits<uchar> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements = 1;
static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uchar2> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements = 2;
static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uchar3> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements = 3;
static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 3;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uchar4> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements = 4;
static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements = 1;
static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint2> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements = 2;
static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint3> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements = 3;
static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 3;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint4> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements = 4;
static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements = 1;
static_assert(sizeof(int) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int2> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements = 2;
static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int3> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements = 4;
static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int4> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements = 4;
static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements = 1;
static_assert(sizeof(float) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float2> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements = 2;
static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float3> {
/* float3 has different size depending on the device, can't use it for interchanging
* memory between CPU and GPU.
*
* Leave body empty to trigger a compile error if used. */
};
template<> struct device_type_traits<packed_float3> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements = 3;
static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float4> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements = 4;
static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<half> {
static const DataType data_type = TYPE_HALF;
static const size_t num_elements = 1;
static_assert(sizeof(half) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<ushort4> {
static const DataType data_type = TYPE_UINT16;
static const size_t num_elements = 4;
static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint16_t> {
static const DataType data_type = TYPE_UINT16;
static const size_t num_elements = 1;
static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<half4> {
static const DataType data_type = TYPE_HALF;
static const size_t num_elements = 4;
static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64;
static const size_t num_elements = 1;
static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
};
/* Device Memory
@@ -266,16 +281,11 @@ class device_memory {
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
device_memory(device_memory &&other) noexcept;
/* No copying and allowed.
*
* This is because device implementation might need to register device memory in an allocation
* map of some sort and use pointer as a key to identify blocks. Moving data from one place to
* another bypassing device allocation routines will make those maps hard to maintain. */
/* No copying allowed. */
device_memory(const device_memory &) = delete;
device_memory(device_memory &&other) noexcept = delete;
device_memory &operator=(const device_memory &) = delete;
device_memory &operator=(device_memory &&) = delete;
/* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using
@@ -310,7 +320,9 @@ template<typename T> class device_only_memory : public device_memory {
: device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
{
data_type = device_type_traits<T>::data_type;
data_elements = max(device_type_traits<T>::num_elements, 1);
data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
device_type_traits<T>::num_elements_gpu,
1);
}
device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
@@ -366,11 +378,15 @@ template<typename T> class device_only_memory : public device_memory {
template<typename T> class device_vector : public device_memory {
public:
/* Can only use this for types that have the same size on CPU and GPU. */
static_assert(device_type_traits<T>::num_elements_cpu ==
device_type_traits<T>::num_elements_gpu);
device_vector(Device *device, const char *name, MemoryType type)
: device_memory(device, name, type)
{
data_type = device_type_traits<T>::data_type;
data_elements = device_type_traits<T>::num_elements;
data_elements = device_type_traits<T>::num_elements_cpu;
modified = true;
need_realloc_ = true;

View File

@@ -109,6 +109,14 @@ class MultiDevice : public Device {
return error_msg;
}
virtual bool show_samples() const override
{
if (devices.size() > 1) {
return false;
}
return devices.front().device->show_samples();
}
virtual BVHLayoutMask get_bvh_layout_mask() const override
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;

View File

@@ -48,6 +48,14 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
{
}
OptiXDevice::Denoiser::~Denoiser()
{
const CUDAContextScope scope(device);
if (optix_denoiser != nullptr) {
optixDenoiserDestroy(optix_denoiser);
}
}
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: CUDADevice(info, stats, profiler),
sbt_data(this, "__sbt", MEM_READ_ONLY),
@@ -83,7 +91,6 @@ OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
};
# endif
if (DebugFlags().optix.use_debug) {
VLOG(1) << "Using OptiX debug mode.";
options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
}
optix_assert(optixDeviceContextCreate(cuContext, &options, &context));
@@ -125,11 +132,6 @@ OptiXDevice::~OptiXDevice()
}
}
/* Make sure denoiser is destroyed before device context! */
if (denoiser_.optix_denoiser != nullptr) {
optixDenoiserDestroy(denoiser_.optix_denoiser);
}
optixDeviceContextDestroy(context);
}
@@ -208,15 +210,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
else {
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
}
module_options.boundValues = nullptr;
module_options.numBoundValues = 0;
# if OPTIX_ABI_VERSION >= 55
module_options.payloadTypes = nullptr;
module_options.numPayloadTypes = 0;
# endif
OptixPipelineCompileOptions pipeline_options = {};
/* Default to no motion blur and two-level graph, since it is the fastest option. */
@@ -231,11 +229,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
if (kernel_features & KERNEL_FEATURE_HAIR) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
# if OPTIX_ABI_VERSION >= 55
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
# else
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
# endif
}
else
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
@@ -332,13 +326,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
/* Built-in thick curve intersection. */
OptixBuiltinISOptions builtin_options = {};
# if OPTIX_ABI_VERSION >= 55
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable endcaps. */
# else
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
# endif
builtin_options.usesMotionBlur = false;
optix_assert(optixBuiltinISModuleGet(
@@ -425,7 +413,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
}
else {
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
}
if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
@@ -895,31 +883,27 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
optix_assert(optixDenoiserComputeMemoryResources(
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
/* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes;
denoiser_.scratch_offset = sizes.stateSizeInBytes;
/* Allocate denoiser state if tile size has changed since last setup. */
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
/* Initialize denoiser state for the current tile size. */
const OptixResult result = optixDenoiserSetup(
denoiser_.optix_denoiser,
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
on a stream that is not the default stream */
buffer_params.width,
buffer_params.height,
denoiser_.state.device_pointer,
denoiser_.scratch_offset,
denoiser_.state.device_pointer + denoiser_.scratch_offset,
denoiser_.scratch_size);
const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser,
denoiser_.queue.stream(),
buffer_params.width,
buffer_params.height,
denoiser_.state.device_pointer,
denoiser_.scratch_offset,
denoiser_.state.device_pointer +
denoiser_.scratch_offset,
denoiser_.scratch_size);
if (result != OPTIX_SUCCESS) {
set_error("Failed to set up OptiX denoiser");
return false;
}
cuda_assert(cuCtxSynchronize());
denoiser_.is_configured = true;
denoiser_.configured_size.x = buffer_params.width;
denoiser_.configured_size.y = buffer_params.height;
@@ -954,6 +938,8 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
device_vector<float> fake_albedo(this, "fake_albedo", MEM_READ_WRITE);
/* Optional albedo and color passes. */
if (context.num_input_passes > 1) {
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
@@ -984,7 +970,6 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
/* Finally run denoising. */
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
OptixDenoiserLayer image_layers = {};
image_layers.input = color_layer;
image_layers.output = output_layer;
@@ -1046,7 +1031,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
return false;
}
device_only_memory<char> &out_data = *bvh->as_data;
device_only_memory<char> &out_data = bvh->as_data;
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
assert(out_data.device == this);
out_data.alloc_to_device(sizes.outputSizeInBytes);
@@ -1137,7 +1122,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
operation = OPTIX_BUILD_OPERATION_UPDATE;
}
else {
bvh_optix->as_data->free();
bvh_optix->as_data.free();
bvh_optix->traversable_handle = 0;
}
@@ -1192,15 +1177,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
int ka = max(k0 - 1, curve.first_key);
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
index_data[i] = i * 4;
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
# if OPTIX_ABI_VERSION >= 55
v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
# else
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
@@ -1213,6 +1189,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
index_data[i] = i * 4;
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
v[0] = make_float4(
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
v[1] = make_float4(
@@ -1221,7 +1199,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
v[3] = make_float4(
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
# endif
}
else {
BoundBox bounds = BoundBox::empty;
@@ -1263,11 +1240,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
OptixBuildInput build_input = {};
if (hair->curve_shape == CURVE_THICK) {
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
# if OPTIX_ABI_VERSION >= 55
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
# else
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
# endif
build_input.curveArray.numPrimitives = num_segments;
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
build_input.curveArray.numVertices = num_vertices;
@@ -1370,9 +1343,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
unsigned int num_instances = 0;
unsigned int max_num_instances = 0xFFFFFFFF;
bvh_optix->as_data->free();
bvh_optix->as_data.free();
bvh_optix->traversable_handle = 0;
bvh_optix->motion_transform_data->free();
bvh_optix->motion_transform_data.free();
optixDeviceContextGetProperty(context,
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
@@ -1405,8 +1378,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
assert(bvh_optix->motion_transform_data->device == this);
bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
assert(bvh_optix->motion_transform_data.device == this);
bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
}
for (Object *ob : bvh->objects) {
@@ -1448,12 +1421,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
}
# if OPTIX_ABI_VERSION < 55
/* Cannot disable any-hit program for thick curves, since it needs to filter out endcaps. */
else
# endif
{
/* Can disable __anyhit__kernel_optix_visibility_test by default.
else {
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
* since it needs to filter out end-caps there).
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
*/
@@ -1470,7 +1440,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
motion_transform_offset = align_up(motion_transform_offset,
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
motion_transform_offset;
motion_transform_offset += motion_transform_size;
@@ -1523,6 +1493,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
/* Disable instance transform if object uses motion transform already. */
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
/* Get traversable handle to motion transform. */
optixConvertPointerToTraversableHandle(context,
motion_transform_gpu,
@@ -1536,6 +1509,10 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
/* Set transform matrix. */
memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
}
else {
/* Disable instance transform if geometry already has it applied to vertex data. */
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
}
}
}

View File

@@ -23,7 +23,6 @@
# include "device/optix/queue.h"
# include "device/optix/util.h"
# include "kernel/types.h"
# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -77,12 +76,13 @@ class OptiXDevice : public CUDADevice {
device_only_memory<KernelParamsOptiX> launch_params;
OptixTraversableHandle tlas_handle = 0;
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
vector<device_only_memory<char>> delayed_free_bvh_memory;
thread_mutex delayed_free_bvh_mutex;
class Denoiser {
public:
explicit Denoiser(OptiXDevice *device);
~Denoiser();
OptiXDevice *device;
OptiXDeviceQueue queue;

View File

@@ -73,8 +73,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
sizeof(device_ptr),
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),

View File

@@ -3,7 +3,7 @@ This program uses code from various sources, the default license is Apache 2.0
for all code, with the following exceptions.
Modified BSD License
* Code adapted from Open Shading Language
* Code adapated from Open Shading Language
* Sobol direction vectors
* Matrix inversion code from OpenEXR
* MD5 Hash code

View File

@@ -29,14 +29,23 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
{
DCHECK(params.use);
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
return make_unique<OptiXDenoiser>(path_trace_device, params);
switch (params.type) {
case DENOISER_OPTIX:
return make_unique<OptiXDenoiser>(path_trace_device, params);
case DENOISER_OPENIMAGEDENOISE:
return make_unique<OIDNDenoiser>(path_trace_device, params);
case DENOISER_NUM:
case DENOISER_NONE:
case DENOISER_ALL:
/* pass */
break;
}
/* Always fallback to OIDN. */
DenoiseParams oidn_params = params;
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
LOG(FATAL) << "Unhandled denoiser type " << params.type << ", should never happen.";
return nullptr;
}
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams &params)

View File

@@ -47,6 +47,9 @@ static bool oidn_progress_monitor_function(void *user_ptr, double /*n*/)
OIDNDenoiser *oidn_denoiser = reinterpret_cast<OIDNDenoiser *>(user_ptr);
return !oidn_denoiser->is_cancelled();
}
#endif
#ifdef WITH_OPENIMAGEDENOISE
class OIDNPass {
public:
@@ -544,6 +547,7 @@ class OIDNDenoiseContext {
* the fake values and denoising of passes which do need albedo can no longer happen. */
bool albedo_replaced_with_fake_ = false;
};
#endif
static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
{
@@ -578,20 +582,18 @@ static void copy_render_buffers_to_device(unique_ptr<DeviceQueue> &queue,
}
}
#endif
bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification)
{
#ifdef WITH_OPENIMAGEDENOISE
thread_scoped_lock lock(mutex_);
/* Make sure the host-side data is available for denoising. */
unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
copy_render_buffers_from_device(queue, render_buffers);
#ifdef WITH_OPENIMAGEDENOISE
OIDNDenoiseContext context(
this, params_, buffer_params, render_buffers, num_samples, allow_inplace_modification);
@@ -618,11 +620,6 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
* copies data from the device it doesn't overwrite the denoiser buffers. */
copy_render_buffers_to_device(queue, render_buffers);
}
#else
(void)buffer_params;
(void)render_buffers;
(void)num_samples;
(void)allow_inplace_modification;
#endif
/* This code is not supposed to run when compiled without OIDN support, so can assume if we made

View File

@@ -138,6 +138,10 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
return false;
}
if (pass_access_info_.offset == PASS_UNUSED) {
return false;
}
const PassType type = pass_access_info_.type;
const PassMode mode = pass_access_info_.mode;
const PassInfo pass_info = Pass::get_info(type, pass_access_info_.include_albedo);

View File

@@ -14,12 +14,9 @@
* limitations under the License.
*/
#include "device/device.h"
#include "integrator/pass_accessor_cpu.h"
#include "session/buffers.h"
#include "util/log.h"
#include "util/tbb.h"
@@ -36,16 +33,70 @@ CCL_NAMESPACE_BEGIN
* Kernel processing.
*/
template<typename Processor>
inline void PassAccessorCPU::run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const Processor &processor) const
{
KernelFilmConvert kfilm_convert;
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
if (destination.pixels) {
/* NOTE: No overlays are applied since they are not used for final renders.
* Can be supported via some sort of specialization to avoid code duplication. */
run_get_pass_kernel_processor_float(
&kfilm_convert, render_buffers, buffer_params, destination, processor);
}
if (destination.pixels_half_rgba) {
/* TODO(sergey): Consider adding specialization to avoid per-pixel overlay check. */
if (destination.num_components == 1) {
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
render_buffers,
buffer_params,
destination,
[&processor](const KernelFilmConvert *kfilm_convert,
ccl_global const float *buffer,
float *pixel_rgba) {
float pixel;
processor(kfilm_convert, buffer, &pixel);
pixel_rgba[0] = pixel;
pixel_rgba[1] = pixel;
pixel_rgba[2] = pixel;
pixel_rgba[3] = 1.0f;
});
}
else if (destination.num_components == 3) {
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
render_buffers,
buffer_params,
destination,
[&processor](const KernelFilmConvert *kfilm_convert,
ccl_global const float *buffer,
float *pixel_rgba) {
processor(kfilm_convert, buffer, pixel_rgba);
pixel_rgba[3] = 1.0f;
});
}
else if (destination.num_components == 4) {
run_get_pass_kernel_processor_half_rgba(
&kfilm_convert, render_buffers, buffer_params, destination, processor);
}
}
}
template<typename Processor>
inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const CPUKernels::FilmConvertFunction func) const
const Processor &processor) const
{
/* NOTE: No overlays are applied since they are not used for final renders.
* Can be supported via some sort of specialization to avoid code duplication. */
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
const int64_t pass_stride = buffer_params.pass_stride;
@@ -61,16 +112,21 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
const float *buffer = window_data + y * buffer_row_stride;
float *pixel = destination.pixels +
(y * buffer_params.width + destination.offset) * pixel_stride;
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride);
for (int64_t x = 0; x < buffer_params.window_width;
++x, buffer += pass_stride, pixel += pixel_stride) {
processor(kfilm_convert, buffer, pixel);
}
});
}
template<typename Processor>
inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const CPUKernels::FilmConvertHalfRGBAFunction func) const
const Processor &processor) const
{
const int64_t pass_stride = buffer_params.pass_stride;
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
@@ -85,7 +141,16 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
const float *buffer = window_data + y * buffer_row_stride;
half4 *pixel = dst_start + y * destination_stride;
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride);
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
float pixel_rgba[4];
processor(kfilm_convert, buffer, pixel_rgba);
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
*pixel = float4_to_half4_display(
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
}
});
}
@@ -98,25 +163,8 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
const BufferParams &buffer_params, \
const Destination &destination) const \
{ \
const CPUKernels &kernels = Device::get_cpu_kernels(); \
KernelFilmConvert kfilm_convert; \
init_kernel_film_convert(&kfilm_convert, buffer_params, destination); \
\
if (destination.pixels) { \
run_get_pass_kernel_processor_float(&kfilm_convert, \
render_buffers, \
buffer_params, \
destination, \
kernels.film_convert_##pass); \
} \
\
if (destination.pixels_half_rgba) { \
run_get_pass_kernel_processor_half_rgba(&kfilm_convert, \
render_buffers, \
buffer_params, \
destination, \
kernels.film_convert_half_rgba_##pass); \
} \
run_get_pass_kernel_processor( \
render_buffers, buffer_params, destination, film_get_pass_pixel_##pass); \
}
/* Float (scalar) passes. */

View File

@@ -16,8 +16,6 @@
#pragma once
#include "device/cpu/kernel.h"
#include "integrator/pass_accessor.h"
CCL_NAMESPACE_BEGIN
@@ -30,19 +28,25 @@ class PassAccessorCPU : public PassAccessor {
using PassAccessor::PassAccessor;
protected:
inline void run_get_pass_kernel_processor_float(
const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const CPUKernels::FilmConvertFunction func) const;
template<typename Processor>
inline void run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const Processor &processor) const;
inline void run_get_pass_kernel_processor_half_rgba(
const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const CPUKernels::FilmConvertHalfRGBAFunction func) const;
template<typename Processor>
inline void run_get_pass_kernel_processor_float(const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const Processor &processor) const;
template<typename Processor>
inline void run_get_pass_kernel_processor_half_rgba(const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
const Processor &processor) const;
#define DECLARE_PASS_ACCESSOR(pass) \
virtual void get_pass_##pass(const RenderBuffers *render_buffers, \

View File

@@ -296,13 +296,13 @@ static BufferParams scale_buffer_params(const BufferParams &params, int resoluti
scaled_params.window_x = params.window_x / resolution_divider;
scaled_params.window_y = params.window_y / resolution_divider;
scaled_params.window_width = max(1, params.window_width / resolution_divider);
scaled_params.window_height = max(1, params.window_height / resolution_divider);
scaled_params.window_width = params.window_width / resolution_divider;
scaled_params.window_height = params.window_height / resolution_divider;
scaled_params.full_x = params.full_x / resolution_divider;
scaled_params.full_y = params.full_y / resolution_divider;
scaled_params.full_width = max(1, params.full_width / resolution_divider);
scaled_params.full_height = max(1, params.full_height / resolution_divider);
scaled_params.full_width = params.full_width / resolution_divider;
scaled_params.full_height = params.full_height / resolution_divider;
scaled_params.update_offset_stride();
@@ -380,10 +380,7 @@ void PathTrace::path_trace(RenderWork &render_work)
PathTraceWork *path_trace_work = path_trace_works_[i].get();
PathTraceWork::RenderStatistics statistics;
path_trace_work->render_samples(statistics,
render_work.path_trace.start_sample,
num_samples,
render_work.path_trace.sample_offset);
path_trace_work->render_samples(statistics, render_work.path_trace.start_sample, num_samples);
const double work_time = time_dt() - work_start_time;
work_balance_infos_[i].time_spent += work_time;
@@ -482,11 +479,7 @@ void PathTrace::set_denoiser_params(const DenoiseParams &params)
}
denoiser_ = Denoiser::create(device_, params);
/* Only take into account the "immediate" cancel to have interactive rendering responding to
* navigation as quickly as possible, but allow to run denoiser after user hit Esc button while
* doing offline rendering. */
denoiser_->is_cancelled_cb = [this]() { return render_cancel_.is_requested; };
denoiser_->is_cancelled_cb = [this]() { return is_cancel_requested(); };
}
void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
@@ -854,11 +847,9 @@ void PathTrace::progress_update_if_needed(const RenderWork &render_work)
{
if (progress_ != nullptr) {
const int2 tile_size = get_render_tile_size();
const uint64_t num_samples_added = uint64_t(tile_size.x) * tile_size.y *
render_work.path_trace.num_samples;
const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
const int current_sample = render_work.path_trace.start_sample +
render_work.path_trace.num_samples -
render_work.path_trace.sample_offset;
render_work.path_trace.num_samples;
progress_->add_samples(num_samples_added, current_sample);
}

View File

@@ -76,7 +76,7 @@ class PathTraceDisplay {
/* Copy buffer of rendered pixels of a given size into a given position of the texture.
*
* This function does not acquire a lock. The reason for this is to allow use of this function
* This function does not acquire a lock. The reason for this is is to allow use of this function
* for partial updates from different devices. In this case the caller will acquire the lock
* once, update all the slices and release
* the lock once. This will ensure that draw() will never use partially updated texture. */

View File

@@ -75,10 +75,7 @@ class PathTraceWork {
/* Render given number of samples as a synchronous blocking call.
* The samples are added to the render buffer associated with this work. */
virtual void render_samples(RenderStatistics &statistics,
int start_sample,
int samples_num,
int sample_offset) = 0;
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num) = 0;
/* Copy render result from this work to the corresponding place of the GPU display.
*

View File

@@ -58,7 +58,7 @@ PathTraceWorkCPU::PathTraceWorkCPU(Device *device,
DeviceScene *device_scene,
bool *cancel_requested_flag)
: PathTraceWork(device, film, device_scene, cancel_requested_flag),
kernels_(Device::get_cpu_kernels())
kernels_(*(device->get_cpu_kernels()))
{
DCHECK_EQ(device->info.type, DEVICE_CPU);
}
@@ -71,17 +71,14 @@ void PathTraceWorkCPU::init_execution()
void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
int start_sample,
int samples_num,
int sample_offset)
int samples_num)
{
const int64_t image_width = effective_buffer_params_.width;
const int64_t image_height = effective_buffer_params_.height;
const int64_t total_pixels_num = image_width * image_height;
if (device_->profiler.active()) {
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
kernel_globals.start_profiling();
}
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
kernel_globals.start_profiling();
}
tbb::task_arena local_arena = local_tbb_arena_create(device_);
@@ -100,7 +97,6 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
work_tile.w = 1;
work_tile.h = 1;
work_tile.start_sample = start_sample;
work_tile.sample_offset = sample_offset;
work_tile.num_samples = 1;
work_tile.offset = effective_buffer_params_.offset;
work_tile.stride = effective_buffer_params_.stride;
@@ -110,10 +106,9 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
});
});
if (device_->profiler.active()) {
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
kernel_globals.stop_profiling();
}
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
kernel_globals.stop_profiling();
}
statistics.occupancy = 1.0f;

View File

@@ -48,8 +48,7 @@ class PathTraceWorkCPU : public PathTraceWork {
virtual void render_samples(RenderStatistics &statistics,
int start_sample,
int samples_num,
int sample_offset) override;
int samples_num) override;
virtual void copy_to_display(PathTraceDisplay *display,
PassMode pass_mode,

View File

@@ -250,20 +250,17 @@ void PathTraceWorkGPU::init_execution()
void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
int start_sample,
int samples_num,
int sample_offset)
int samples_num)
{
/* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to
* add more work (because tiles are smaller, so there is higher chance that more paths will
* become busy after adding new tiles). This is especially important for the shadow catcher which
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
0);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,
sample_offset,
device_scene_->data.integrator.scrambling_distance);
enqueue_reset();
@@ -440,15 +437,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
DCHECK_LE(work_size, max_num_paths_);
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
/* Closest ray intersection kernels with integrator state and render buffer. */
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
queue_->enqueue(kernel, work_size, args);
break;
}
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
@@ -818,10 +807,10 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
interop_use_ = device->should_use_graphics_interop();
if (interop_use_) {
VLOG(2) << "Using graphics interop GPU display update.";
VLOG(2) << "Will be using graphics interop GPU display update.";
}
else {
VLOG(2) << "Using naive GPU display update.";
VLOG(2) << "Will be using naive GPU display update.";
}
interop_use_checked_ = true;

View File

@@ -46,8 +46,7 @@ class PathTraceWorkGPU : public PathTraceWork {
virtual void render_samples(RenderStatistics &statistics,
int start_sample,
int samples_num,
int sample_offset) override;
int samples_num) override;
virtual void copy_to_display(PathTraceDisplay *display,
PassMode pass_mode,

View File

@@ -88,16 +88,6 @@ int RenderScheduler::get_num_samples() const
return num_samples_;
}
void RenderScheduler::set_sample_offset(int sample_offset)
{
sample_offset_ = sample_offset;
}
int RenderScheduler::get_sample_offset() const
{
return sample_offset_;
}
void RenderScheduler::set_time_limit(double time_limit)
{
time_limit_ = time_limit;
@@ -120,15 +110,13 @@ int RenderScheduler::get_num_rendered_samples() const
return state_.num_rendered_samples;
}
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
{
buffer_params_ = buffer_params;
update_start_resolution_divider();
set_num_samples(num_samples);
set_start_sample(sample_offset);
set_sample_offset(sample_offset);
/* In background mode never do lower resolution render preview, as it is not really supported
* by the software. */
@@ -183,7 +171,7 @@ void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples,
void RenderScheduler::reset_for_next_tile()
{
reset(buffer_params_, num_samples_, sample_offset_);
reset(buffer_params_, num_samples_);
}
bool RenderScheduler::render_work_reschedule_on_converge(RenderWork &render_work)
@@ -329,7 +317,6 @@ RenderWork RenderScheduler::get_render_work()
render_work.path_trace.start_sample = get_start_sample_to_path_trace();
render_work.path_trace.num_samples = get_num_samples_to_path_trace();
render_work.path_trace.sample_offset = get_sample_offset();
render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
@@ -406,9 +393,6 @@ bool RenderScheduler::set_postprocess_render_work(RenderWork *render_work)
any_scheduled = true;
}
/* Force update. */
any_scheduled = true;
if (any_scheduled) {
render_work->display.update = true;
}
@@ -843,26 +827,6 @@ int RenderScheduler::get_num_samples_to_path_trace() const
num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
}
/* When time limit is used clamp the calculated number of samples to keep occupancy.
* This is because time limit causes the last render iteration to happen with less number of
* samples, which conflicts with the occupancy (lower number of samples causes lower
* occupancy, also the calculation is based on number of previously rendered samples).
*
* When time limit is not used the number of samples per render iteration is either increasing
* or stays the same, so there is no need to clamp number of samples calculated for occupancy.
*/
if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
const double remaining_render_time = max(
0.0, time_limit_ - (time_dt() - state_.start_render_time));
const double time_per_sample_average = path_trace_time_.get_average();
const double predicted_render_time = num_samples_to_occupy * time_per_sample_average;
if (predicted_render_time > remaining_render_time) {
num_samples_to_occupy = lround(num_samples_to_occupy *
(remaining_render_time / predicted_render_time));
}
}
num_samples_to_render = max(num_samples_to_render,
min(num_samples_to_occupy, max_num_samples_to_render));
}

View File

@@ -39,7 +39,6 @@ class RenderWork {
struct {
int start_sample = 0;
int num_samples = 0;
int sample_offset = 0;
} path_trace;
struct {
@@ -126,9 +125,6 @@ class RenderScheduler {
void set_num_samples(int num_samples);
int get_num_samples() const;
void set_sample_offset(int sample_offset);
int get_sample_offset() const;
/* Time limit for the path tracing tasks, in minutes.
* Zero disables the limit. */
void set_time_limit(double time_limit);
@@ -154,7 +150,7 @@ class RenderScheduler {
/* Reset scheduler, indicating that rendering will happen from scratch.
* Resets current rendered state, as well as scheduling information. */
void reset(const BufferParams &buffer_params, int num_samples, int sample_offset);
void reset(const BufferParams &buffer_params, int num_samples);
/* Reset scheduler upon switching to a next tile.
* Will keep the same number of samples and full-frame render parameters, but will reset progress
@@ -423,8 +419,6 @@ class RenderScheduler {
int start_sample_ = 0;
int num_samples_ = 0;
int sample_offset_ = 0;
/* Limit in seconds for how long path tracing is allowed to happen.
* Zero means no limit is applied. */
double time_limit_ = 0.0;

View File

@@ -96,7 +96,7 @@ bool ShaderEval::eval_cpu(Device *device,
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
/* Find required kernel function. */
const CPUKernels &kernels = Device::get_cpu_kernels();
const CPUKernels &kernels = *(device->get_cpu_kernels());
/* Simple parallel_for over all work items. */
KernelShaderEvalInput *input_data = input.data();

View File

@@ -46,8 +46,7 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
return next_power_of_two(x);
}
TileSize tile_calculate_best_size(const bool accel_rt,
const int2 &image_size,
TileSize tile_calculate_best_size(const int2 &image_size,
const int num_samples,
const int max_num_path_states,
const float scrambling_distance)
@@ -74,7 +73,7 @@ TileSize tile_calculate_best_size(const bool accel_rt,
TileSize tile_size;
const int num_path_states_per_sample = max_num_path_states / num_samples;
if (scrambling_distance < 0.9f && accel_rt) {
if (scrambling_distance < 0.9f) {
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
tile_size.width = min(image_size.x, max_num_path_states);
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));

View File

@@ -49,8 +49,7 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
* of active path states.
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
* possible, and have as many threads active for every tile as possible. */
TileSize tile_calculate_best_size(const bool accel_rt,
const int2 &image_size,
TileSize tile_calculate_best_size(const int2 &image_size,
const int num_samples,
const int max_num_path_states,
const float scrambling_distance);

View File

@@ -28,11 +28,6 @@ WorkTileScheduler::WorkTileScheduler()
{
}
void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
{
accelerated_rt_ = accelerated_rt;
}
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
{
max_num_path_states_ = max_num_path_states;
@@ -41,7 +36,6 @@ void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
void WorkTileScheduler::reset(const BufferParams &buffer_params,
int sample_start,
int samples_num,
int sample_offset,
float scrambling_distance)
{
/* Image buffer parameters. */
@@ -57,7 +51,6 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
/* Samples parameters. */
sample_start_ = sample_start;
samples_num_ = samples_num;
sample_offset_ = sample_offset;
/* Initialize new scheduling. */
reset_scheduler_state();
@@ -66,7 +59,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
void WorkTileScheduler::reset_scheduler_state()
{
tile_size_ = tile_calculate_best_size(
accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
VLOG(3) << "Will schedule tiles of size " << tile_size_;
@@ -118,7 +111,6 @@ bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_
work_tile.h = tile_size_.height;
work_tile.start_sample = sample_start_ + start_sample;
work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
work_tile.sample_offset = sample_offset_;
work_tile.offset = offset_;
work_tile.stride = stride_;

View File

@@ -31,9 +31,6 @@ class WorkTileScheduler {
public:
WorkTileScheduler();
/* To indicate if there is accelerated RT support. */
void set_accelerated_rt(bool state);
/* MAximum path states which are allowed to be used by a single scheduled work tile.
*
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
@@ -44,7 +41,6 @@ class WorkTileScheduler {
void reset(const BufferParams &buffer_params,
int sample_start,
int samples_num,
int sample_offset,
float scrambling_distance);
/* Get work for a device.
@@ -58,9 +54,6 @@ class WorkTileScheduler {
protected:
void reset_scheduler_state();
/* Used to indicate if there is accelerated ray tracing. */
bool accelerated_rt_ = false;
/* Maximum allowed path states to be used.
*
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
@@ -86,7 +79,6 @@ class WorkTileScheduler {
* (splitting into a smaller work tiles). */
int sample_start_ = 0;
int samples_num_ = 0;
int sample_offset_ = 0;
/* Tile size which be scheduled for rendering. */
TileSize tile_size_;

View File

@@ -39,10 +39,6 @@ set(SRC_KERNEL_DEVICE_HIP
device/hip/kernel.cpp
)
set(SRC_KERNEL_DEVICE_METAL
device/metal/kernel.metal
)
set(SRC_KERNEL_DEVICE_OPTIX
device/optix/kernel.cu
device/optix/kernel_shader_raytrace.cu
@@ -83,13 +79,6 @@ set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
device/optix/globals.h
)
set(SRC_KERNEL_DEVICE_METAL_HEADERS
device/metal/compat.h
device/metal/context_begin.h
device/metal/context_end.h
device/metal/globals.h
)
set(SRC_KERNEL_CLOSURE_HEADERS
closure/alloc.h
closure/bsdf.h
@@ -273,7 +262,6 @@ set(SRC_KERNEL_UTIL_HEADERS
)
set(SRC_KERNEL_TYPES_HEADERS
tables.h
textures.h
types.h
)
@@ -411,8 +399,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/cuda
--use_fast_math
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}
-Wno-deprecated-gpu-targets)
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
if(${experimental})
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
set(name ${name}_experimental)
endif()
if(WITH_NANOVDB)
set(cuda_flags ${cuda_flags}
@@ -420,10 +412,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
-I "${NANOVDB_INCLUDE_DIR}")
endif()
if(WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
endif()
if(WITH_CYCLES_CUBIN_COMPILER)
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
@@ -572,14 +560,13 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-ffast-math
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
if(WITH_NANOVDB)
set(hip_flags ${hip_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
if(${experimental})
set(hip_flags ${hip_flags} -D __KERNEL_EXPERIMENTAL__)
set(name ${name}_experimental)
endif()
if(WITH_CYCLES_DEBUG)
set(hip_flags ${hip_flags} -D WITH_CYCLES_DEBUG)
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
endif()
add_custom_command(
@@ -620,10 +607,6 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
-I "${NANOVDB_INCLUDE_DIR}")
endif()
if(WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
endif()
if(WITH_CYCLES_CUBIN_COMPILER)
# Needed to find libnvrtc-builtins.so. Can't do it from inside
# cycles_cubin_cc since the env variable is read before main()
@@ -712,7 +695,7 @@ if(WITH_COMPILER_ASAN)
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=all")
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
# With OSL, Cycles disables rtti in some modules, which then breaks at linking
# With OSL, Cycles disables rtti in some modules, wich then breaks at linking
# when trying to use vptr sanitizer (included into 'undefined' general option).
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=vptr")
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
@@ -740,14 +723,12 @@ cycles_add_library(cycles_kernel "${LIB}"
${SRC_KERNEL_DEVICE_CUDA}
${SRC_KERNEL_DEVICE_HIP}
${SRC_KERNEL_DEVICE_OPTIX}
${SRC_KERNEL_DEVICE_METAL}
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_CPU_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
${SRC_KERNEL_DEVICE_HIP_HEADERS}
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
${SRC_KERNEL_DEVICE_METAL_HEADERS}
)
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
@@ -759,7 +740,6 @@ source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
@@ -792,8 +772,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)

View File

@@ -97,7 +97,7 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
swapped = false;
for (int j = 0; j < num_hits - 1; ++j) {
if (hits[j].t > hits[j + 1].t) {
Intersection tmp_hit = hits[j];
struct Intersection tmp_hit = hits[j];
float3 tmp_Ng = Ng[j];
hits[j] = hits[j + 1];
Ng[j] = Ng[j + 1];

View File

@@ -438,7 +438,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
if (label & LABEL_TRANSMIT) {
float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
if (threshold_squared >= 0.0f && !(label & LABEL_DIFFUSE)) {
if (threshold_squared >= 0.0f) {
if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
label |= LABEL_TRANSMIT_TRANSPARENT;
}

View File

@@ -18,7 +18,6 @@
#pragma once
#include "kernel/tables.h"
#include "kernel/types.h"
#include "kernel/util/profiling.h"

View File

@@ -18,7 +18,6 @@
/* CPU Kernel Interface */
#include "util/half.h"
#include "util/types.h"
#include "kernel/types.h"

View File

@@ -37,7 +37,7 @@
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
@@ -52,37 +52,6 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel);
#undef KERNEL_INTEGRATOR_INIT_FUNCTION
#undef KERNEL_INTEGRATOR_SHADE_FUNCTION
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
const float *buffer, \
float *pixel, \
const int width, \
const int buffer_stride, \
const int pixel_stride); \
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
const KernelFilmConvert *kfilm_convert, \
const float *buffer, \
half4 *pixel, \
const int width, \
const int buffer_stride);
KERNEL_FILM_CONVERT_FUNCTION(depth)
KERNEL_FILM_CONVERT_FUNCTION(mist)
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
KERNEL_FILM_CONVERT_FUNCTION(float)
KERNEL_FILM_CONVERT_FUNCTION(light_path)
KERNEL_FILM_CONVERT_FUNCTION(float3)
KERNEL_FILM_CONVERT_FUNCTION(motion)
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
KERNEL_FILM_CONVERT_FUNCTION(combined)
KERNEL_FILM_CONVERT_FUNCTION(float4)
#undef KERNEL_FILM_CONVERT_FUNCTION
/* --------------------------------------------------------------------
* Shader evaluation.
*/

Some files were not shown because too many files have changed in this diff Show More