Fix #109024: Off-by-1 in rna_access for non-array props without raw access #115967

Open
Thomas Barlow wants to merge 4 commits from Mysteryem/blender:fix_109024_non_raw_non_array_off_by_one into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
570 changed files with 77185 additions and 43228 deletions
Showing only changes of commit 4e58ea9079 - Show all commits

View File

@ -760,8 +760,8 @@ endif()
# Unit testing
option(WITH_GTESTS "Enable GTest unit testing" OFF)
option(WITH_OPENGL_RENDER_TESTS "Enable OpenGL render related unit testing (Experimental)" OFF)
option(WITH_OPENGL_DRAW_TESTS "Enable OpenGL UI drawing related unit testing (Experimental)" OFF)
option(WITH_GPU_RENDER_TESTS "Enable GPU render related unit testing (EEVEE, Workbench and Grease Pencil)" OFF)
option(WITH_GPU_DRAW_TESTS "Enable GPU drawing related unit testing (GPU backends and draw manager)" OFF)
option(WITH_COMPOSITOR_REALTIME_TESTS "Enable regression testing for realtime compositor" OFF)
if(UNIX AND NOT (APPLE OR HAIKU))
option(WITH_UI_TESTS "\

View File

@ -17,6 +17,7 @@ ExternalProject_Add(external_ocloc
PREFIX ${BUILD_DIR}/ocloc
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/ocloc ${DEFAULT_CMAKE_FLAGS} ${OCLOC_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/ocloc
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/ocloc/src/external_ocloc/ < ${PATCH_DIR}/ocloc.diff
)
add_dependencies(

View File

@ -47,12 +47,6 @@ set(OPENVDB_EXTRA_ARGS
)
set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
if(APPLE)
set(OPENVDB_PATCH
${OPENVDB_PATCH} &&
${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
)
endif()
ExternalProject_Add(openvdb
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}

View File

@ -0,0 +1,14 @@
diff --git a/shared/offline_compiler/source/ocloc_fatbinary.cpp b/shared/offline_compiler/source/ocloc_fatbinary.cpp
index 98a1c0e..4d9b5b0 100644
--- a/shared/offline_compiler/source/ocloc_fatbinary.cpp
+++ b/shared/offline_compiler/source/ocloc_fatbinary.cpp
@@ -286,7 +286,9 @@ int buildFatBinaryForTarget(int retVal, const std::vector<std::string> &argsCopy
productConfig = ProductConfigHelper::parseMajorMinorRevisionValue(argHelper->productConfigHelper->getProductConfigFromDeviceName(product));
}
- fatbinary.appendFileEntry(pointerSize + "." + productConfig, pCompiler->getPackedDeviceBinaryOutput());
+ // Storing binaries under the hardware prefix instead of the full architecture version number,
+ // as they would otherwise be ignored if they do not fully match that of the execution device.
+ fatbinary.appendFileEntry(pointerSize + "." + NEO::hardwarePrefix[argHelper->productConfigHelper->getProductFamilyFromDeviceName(productConfig)], pCompiler->getPackedDeviceBinaryOutput());
return retVal;
}

File diff suppressed because it is too large Load Diff

View File

@ -150,11 +150,12 @@ def build_info(
print("parsing make log ...")
for line in makelog:
args: Union[str, List[str]] = line.split()
if not any([(c in args) for c in compilers]):
args_orig: Union[str, List[str]] = line.split()
args = [fake_compiler if c in compilers else c for c in args_orig]
if args == args_orig:
# No compilers in the command, skip.
continue
del args_orig
# join args incase they are not.
args = ' '.join(args)
@ -162,8 +163,6 @@ def build_info(
args = args.replace(" -D ", " -D")
args = args.replace(" -I ", " -I")
for c in compilers:
args = args.replace(c, fake_compiler)
args = shlex.split(args)
# end

View File

@ -17,8 +17,8 @@ if NOT "%1" == "" (
shift /1
) else if "%1" == "with_tests" (
set TESTS_CMAKE_ARGS=%TESTS_CMAKE_ARGS% -DWITH_GTESTS=On
) else if "%1" == "with_opengl_tests" (
set TESTS_CMAKE_ARGS=%TESTS_CMAKE_ARGS% -DWITH_OPENGL_DRAW_TESTS=On -DWITH_OPENGL_RENDER_TESTS=On
) else if "%1" == "with_gpu_tests" (
set TESTS_CMAKE_ARGS=%TESTS_CMAKE_ARGS% -DWITH_GPU_DRAW_TESTS=On -DWITH_GPU_RENDER_TESTS=On
) else if "%1" == "full" (
set TARGET=Full
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^

View File

@ -37,7 +37,7 @@ echo - doc_py ^(Generate sphinx python api docs^)
echo.
echo Experimental options
echo - with_opengl_tests ^(enable both the render and draw opengl test suites^)
echo - with_gpu_tests ^(enable both the render and draw gpu test suites including EEVEE, Workbench, Grease Pencil, draw manager and GPU backends^)
echo - clang ^(enable building with clang^)
echo - asan ^(enable asan when building with clang^)
echo - ninja ^(enable building with ninja instead of msbuild^)

View File

@ -7,4 +7,5 @@ GPL-2.0-or-later GPL-license.txt https://spdx.org/licenses/GP
GPL-3.0-or-later GPL3-license.txt https://spdx.org/licenses/GPL-3.0-or-later.html
LGPL-2.1-or-later LGPL2.1-license.txt https://spdx.org/licenses/LGPL-2.1-or-later.html
MIT MIT-license.txt https://spdx.org/licenses/MIT.html
MPL-2.0 MPL-2.0.txt https://spdx.org/licenses/MPL-2.0.html
Zlib Zlib-license.txt https://spdx.org/licenses/Zlib.html

View File

@ -191,13 +191,6 @@ if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()
# LLVM and OSL need to build without RTTI
if(WIN32 AND MSVC)
set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif()
# Definitions and Includes
add_definitions(

View File

@ -300,15 +300,15 @@ def do_versions(self):
if version <= (2, 79, 2):
cmat = mat.cycles
if not cmat.is_property_set("displacement_method"):
cmat.displacement_method = 'BUMP'
if cmat.get("displacement_method", -1) == -1:
cmat['displacement_method'] = 0
# Change default to bump again.
if version <= (2, 79, 6) or \
(version >= (2, 80, 0) and version <= (2, 80, 41)):
cmat = mat.cycles
if not cmat.is_property_set("displacement_method"):
cmat.displacement_method = 'DISPLACEMENT'
if cmat.get("displacement_method", -1) == -1:
cmat['displacement_method'] = 1
if version <= (3, 5, 3):
cmat = mat.cycles

View File

@ -313,7 +313,7 @@ void CPUDevice::get_cpu_kernel_thread_globals(
kernel_thread_globals.clear();
void *osl_memory = get_cpu_osl_memory();
for (int i = 0; i < info.cpu_threads; i++) {
kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler, i);
}
}

View File

@ -60,7 +60,7 @@ class CPUKernels {
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride)>;

View File

@ -12,14 +12,16 @@ CCL_NAMESPACE_BEGIN
CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals,
void *osl_globals_memory,
Profiler &cpu_profiler)
Profiler &cpu_profiler,
const int thread_index)
: KernelGlobalsCPU(kernel_globals), cpu_profiler_(cpu_profiler)
{
clear_runtime_pointers();
#ifdef WITH_OSL
OSLGlobals::thread_init(this, static_cast<OSLGlobals *>(osl_globals_memory));
OSLGlobals::thread_init(this, static_cast<OSLGlobals *>(osl_globals_memory), thread_index);
#else
(void)thread_index;
(void)osl_globals_memory;
#endif

View File

@ -23,7 +23,8 @@ class CPUKernelThreadGlobals : public KernelGlobalsCPU {
* without OSL support. Will avoid need to those unnamed pointers and casts. */
CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals,
void *osl_globals_memory,
Profiler &cpu_profiler);
Profiler &cpu_profiler,
const int thread_index);
~CPUKernelThreadGlobals();

View File

@ -73,7 +73,7 @@ class MetalDevice : public Device {
/* Bindless Textures */
bool is_texture(const TextureInfo &tex);
device_vector<TextureInfo> texture_info;
bool need_texture_info;
bool need_texture_info = false;
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
id<MTLBuffer> buffer_bindings_1d = nil;

View File

@ -40,7 +40,7 @@ class OneapiDeviceQueue : public DeviceQueue {
virtual void copy_to_device(device_memory &mem) override;
virtual void copy_from_device(device_memory &mem) override;
virtual bool supports_local_atomic_sort() const
virtual bool supports_local_atomic_sort() const override
{
return true;
}

View File

@ -27,7 +27,6 @@ struct DeviceKernelArguments {
POINTER,
INT32,
FLOAT32,
BOOLEAN,
KERNEL_FILM_CONVERT,
};
@ -66,10 +65,6 @@ struct DeviceKernelArguments {
{
add(FLOAT32, value, sizeof(float));
}
void add(const bool *value)
{
add(BOOLEAN, value, 4);
}
void add(const Type type, const void *value, size_t size)
{
assert(count < MAX_ARGS);

View File

@ -103,7 +103,7 @@ class DenoiserGPU : public Denoiser {
int denoised_offset;
int num_components;
bool use_compositing;
int use_compositing;
bool use_denoising_albedo;
};

View File

@ -1055,6 +1055,7 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
queue_->zero_to_device(num_active_pixels);
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
const int reset_int = reset; /* No bool kernel arguments. */
DeviceKernelArguments args(&buffers_->buffer.device_pointer,
&effective_buffer_params_.full_x,
@ -1062,7 +1063,7 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
&effective_buffer_params_.width,
&effective_buffer_params_.height,
&threshold,
&reset,
&reset_int,
&effective_buffer_params_.offset,
&effective_buffer_params_.stride,
&num_active_pixels.device_pointer);

View File

@ -327,6 +327,7 @@ set(SRC_KERNEL_UTIL_HEADERS
util/color.h
util/differential.h
util/lookup_table.h
util/nanovdb.h
util/profiling.h
)
@ -483,8 +484,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
if(WITH_NANOVDB)
set(cuda_flags ${cuda_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
-D WITH_NANOVDB)
endif()
if(WITH_CYCLES_DEBUG)
@ -634,8 +634,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
if(WITH_NANOVDB)
set(hip_flags ${hip_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
-D WITH_NANOVDB)
endif()
if(WITH_CYCLES_DEBUG)
@ -755,8 +754,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
if(WITH_NANOVDB)
set(cuda_flags ${cuda_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
-D WITH_NANOVDB)
endif()
if(WITH_CYCLES_OSL)
set(cuda_flags ${cuda_flags}
@ -936,8 +934,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_NANOVDB)
list(APPEND sycl_compiler_flags
-DWITH_NANOVDB
-I"${NANOVDB_INCLUDE_DIR}")
-DWITH_NANOVDB)
endif()
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
@ -1256,16 +1253,3 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/util)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util)
if(WITH_NANOVDB)
set(SRC_NANOVDB_HEADERS
nanovdb/NanoVDB.h
nanovdb/CNanoVDB.h
)
set(SRC_NANOVDB_UTIL_HEADERS
nanovdb/util/CSampleFromVoxels.h
nanovdb/util/SampleFromVoxels.h
)
delayed_install(${NANOVDB_INCLUDE_DIR} "${SRC_NANOVDB_HEADERS}" ${CYCLES_INSTALL_PATH}/source/nanovdb)
delayed_install(${NANOVDB_INCLUDE_DIR} "${SRC_NANOVDB_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/nanovdb/util)
endif()

View File

@ -49,6 +49,7 @@ typedef struct KernelGlobalsCPU {
OSLGlobals *osl = nullptr;
OSLShadingSystem *osl_ss = nullptr;
OSLThreadData *osl_tdata = nullptr;
int osl_thread_index = 0;
#endif
#ifdef __PATH_GUIDING__

View File

@ -5,9 +5,7 @@
#pragma once
#ifdef WITH_NANOVDB
# define NANOVDB_USE_INTRINSICS
# include <nanovdb/NanoVDB.h>
# include <nanovdb/util/SampleFromVoxels.h>
# include "kernel/util/nanovdb.h"
#endif
CCL_NAMESPACE_BEGIN
@ -685,46 +683,59 @@ template<typename TexT, typename OutT = float4> struct TextureInterpolator {
};
#ifdef WITH_NANOVDB
template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
typedef typename nanovdb::NanoGrid<TexT>::AccessorType AccessorType;
template<typename TexT, typename OutT> struct NanoVDBInterpolator {
static ccl_always_inline float read(float r)
{
return r;
}
static ccl_always_inline float4 read(nanovdb::Vec3f r)
static ccl_always_inline float4 read(const packed_float3 r)
{
return make_float4(r[0], r[1], r[2], 1.0f);
return make_float4(r.x, r.y, r.z, 1.0f);
}
static ccl_always_inline OutT interp_3d_closest(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_closest(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x, y, z);
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
const nanovdb::Coord coord((int32_t)floorf(x), (int32_t)floorf(y), (int32_t)floorf(z));
return read(acc.getValue(coord));
}
static ccl_always_inline OutT interp_3d_linear(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_linear(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
int ix, iy, iz;
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
return mix(mix(mix(read(acc.getValue(nanovdb::Coord(ix, iy, iz))),
read(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
1.0f - tz),
ty),
mix(mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
1.0f - tz),
1.0f - ty),
tx);
}
/* Tricubic b-spline interpolation. */
template<typename Acc>
# if defined(__GNUC__) || defined(__clang__)
static ccl_always_inline
# else
static ccl_never_inline
# endif
OutT
interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
interp_3d_cubic(const Acc &acc, float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
@ -779,15 +790,20 @@ template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
using namespace nanovdb;
NanoGrid<TexT> *const grid = (NanoGrid<TexT> *)info.data;
AccessorType acc = grid->getAccessor();
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
case INTERPOLATION_CLOSEST:
case INTERPOLATION_CLOSEST: {
ReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_closest(acc, x, y, z);
case INTERPOLATION_LINEAR:
}
case INTERPOLATION_LINEAR: {
CachedReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_linear(acc, x, y, z);
default:
}
default: {
CachedReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_cubic(acc, x, y, z);
}
}
}
};
@ -880,7 +896,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
return make_float4(f, f, f, 1.0f);
}
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
return NanoVDBInterpolator<packed_float3, float4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_NANOVDB_FPN: {
const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
info, P.x, P.y, P.z, interp);

View File

@ -101,7 +101,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride);

View File

@ -164,7 +164,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride)
{

View File

@ -45,6 +45,7 @@ typedef unsigned long long uint64_t;
#define ccl_global
#define ccl_inline_constant __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_static_constexpr static constexpr
#define ccl_constant const
#define ccl_gpu_shared __shared__
#define ccl_private

View File

@ -8,12 +8,17 @@ CCL_NAMESPACE_BEGIN
#if !defined __KERNEL_METAL__
# ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
# include "kernel/util/nanovdb.h"
# endif
#endif
ccl_device_inline float frac(float x, ccl_private int *ix)
{
int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
*ix = i;
return x - (float)i;
}
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
@ -126,63 +131,121 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
}
#ifdef WITH_NANOVDB
template<typename T, typename S>
ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
ccl_private S &s, float x, float y, float z)
template<typename OutT, typename Acc>
ccl_device OutT
kernel_tex_image_interp_trilinear_nanovdb(ccl_private Acc &acc, float x, float y, float z)
{
float px = floorf(x);
float py = floorf(y);
float pz = floorf(z);
float fx = x - px;
float fy = y - py;
float fz = z - pz;
int ix, iy, iz;
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
float g0y = cubic_g0(fy);
float g1y = cubic_g1(fy);
float g0z = cubic_g0(fz);
float g1z = cubic_g1(fz);
return mix(mix(mix(OutT(acc.getValue(nanovdb::Coord(ix, iy, iz))),
OutT(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
tz),
mix(OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
1.0f - tz),
ty),
mix(mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
tz),
mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
1.0f - tz),
1.0f - ty),
tx);
}
float x0 = px + cubic_h0(fx);
float x1 = px + cubic_h1(fx);
float y0 = py + cubic_h0(fy);
float y1 = py + cubic_h1(fy);
float z0 = pz + cubic_h0(fz);
float z1 = pz + cubic_h1(fz);
template<typename OutT, typename Acc>
ccl_device OutT
kernel_tex_image_interp_tricubic_nanovdb(ccl_private Acc &acc, float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
int pix, piy, piz;
int nnix, nniy, nniz;
using namespace nanovdb;
/* A -0.5 offset is used to center the cubic samples around the sample point. */
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) +
g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) +
g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) +
g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
pix = ix - 1;
piy = iy - 1;
piz = iz - 1;
nix = ix + 1;
niy = iy + 1;
niz = iz + 1;
nnix = ix + 2;
nniy = iy + 2;
nniz = iz + 2;
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {piy, iy, niy, nniy};
const int zc[4] = {piz, iz, niz, nniz};
float u[4], v[4], w[4];
/* Some helper macros to keep code size reasonable.
* Lets the compiler inline all the matrix multiplications.
*/
# define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
{ \
u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
} \
(void)0
# define DATA(x, y, z) (OutT(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
# define COL_TERM(col, row) \
(v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
u[3] * DATA(3, col, row)))
# define ROW_TERM(row) \
(w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
/* Actual interpolation. */
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
# undef COL_TERM
# undef ROW_TERM
# undef DATA
# undef SET_CUBIC_SPLINE_WEIGHTS
}
# if defined(__KERNEL_METAL__)
template<typename T>
__attribute__((noinline)) typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
template<typename OutT, typename T>
__attribute__((noinline)) OutT kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
# else
template<typename T>
ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
template<typename OutT, typename T>
ccl_device_noinline OutT kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
# endif
{
using namespace nanovdb;
ccl_global NanoGrid<T> *const grid = (ccl_global NanoGrid<T> *)info.data;
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
AccessorType acc = grid->getAccessor();
switch (interpolation) {
case INTERPOLATION_CLOSEST:
return SampleFromVoxels<AccessorType, 0, false>(acc)(Vec3f(x, y, z));
case INTERPOLATION_LINEAR:
return SampleFromVoxels<AccessorType, 1, false>(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f));
default:
SampleFromVoxels<AccessorType, 1, false> s(acc);
return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f);
case INTERPOLATION_CLOSEST: {
ReadAccessor<T> acc(grid->tree().root());
const nanovdb::Coord coord((int32_t)floorf(x), (int32_t)floorf(y), (int32_t)floorf(z));
return OutT(acc.getValue(coord));
}
case INTERPOLATION_LINEAR: {
CachedReadAccessor<T> acc(grid->tree().root());
return kernel_tex_image_interp_trilinear_nanovdb<OutT>(acc, x, y, z);
}
default: {
CachedReadAccessor<T> acc(grid->tree().root());
return kernel_tex_image_interp_tricubic_nanovdb<OutT>(acc, x, y, z);
}
}
}
#endif
@ -240,20 +303,20 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
#ifdef WITH_NANOVDB
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
float f = kernel_tex_image_interp_nanovdb<float>(info, x, y, z, interpolation);
float f = kernel_tex_image_interp_nanovdb<float, float>(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>(
float3 f = kernel_tex_image_interp_nanovdb<float3, packed_float3>(
info, x, y, z, interpolation);
return make_float4(f[0], f[1], f[2], 1.0f);
return make_float4(f.x, f.y, f.z, 1.0f);
}
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
float f = kernel_tex_image_interp_nanovdb<nanovdb::FpN>(info, x, y, z, interpolation);
float f = kernel_tex_image_interp_nanovdb<float, nanovdb::FpN>(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
float f = kernel_tex_image_interp_nanovdb<nanovdb::Fp16>(info, x, y, z, interpolation);
float f = kernel_tex_image_interp_nanovdb<float, nanovdb::Fp16>(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
#endif

View File

@ -668,7 +668,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
int sw,
int sh,
float threshold,
bool reset,
int reset,
int offset,
int stride,
ccl_global uint *num_active_pixels)
@ -1104,7 +1104,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
int pass_denoised,
int pass_sample_count,
int num_components,
bool use_compositing)
int use_compositing)
{
const int work_index = ccl_gpu_global_id_x();
const int y = work_index / width;

View File

@ -38,6 +38,7 @@ typedef unsigned long long uint64_t;
#define ccl_global
#define ccl_inline_constant __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_static_constexpr static constexpr
#define ccl_constant const
#define ccl_gpu_shared __shared__
#define ccl_private

View File

@ -47,6 +47,7 @@ using namespace metal::raytracing;
#define ccl_global device
#define ccl_inline_constant static constant constexpr
#define ccl_device_constant constant
#define ccl_static_constexpr static constant constexpr
#define ccl_constant constant
#define ccl_gpu_shared threadgroup
#define ccl_private thread

View File

@ -5,10 +5,7 @@
// clang-format off
#ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
# include "kernel/util/nanovdb.h"
#endif
/* Open the Metal kernel context class

View File

@ -40,7 +40,8 @@
#define ccl_device_inline inline
#define ccl_noinline __attribute__((noinline))
#define ccl_inline_constant const constexpr
#define ccl_static_constant const
#define ccl_device_constant static constexpr
#define ccl_static_constexpr static constexpr
#define ccl_device_forceinline __attribute__((always_inline))
#define ccl_device_noinline ccl_device ccl_noinline
#define ccl_device_noinline_cpu ccl_device

View File

@ -34,8 +34,7 @@ typedef struct ccl_vdb_double_t {
} ccl_vdb_double_t;
# define double ccl_vdb_double_t
# include <nanovdb/NanoVDB.h>
# include <nanovdb/util/SampleFromVoxels.h>
# include "kernel/util/nanovdb.h"
# undef double
#endif

View File

@ -202,48 +202,64 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals, int id, float x, float
}
#ifdef WITH_NANOVDB
template<typename T> struct NanoVDBInterpolator {
template<typename TexT, typename OutT> struct NanoVDBInterpolator {
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
static ccl_always_inline float4 read(float r)
static ccl_always_inline float read(float r)
{
return make_float4(r, r, r, 1.0f);
return r;
}
static ccl_always_inline float4 read(nanovdb::Vec3f r)
static ccl_always_inline float4 read(const packed_float3 r)
{
return make_float4(r[0], r[1], r[2], 1.0f);
return make_float4(r.x, r.y, r.z, 1.0f);
}
static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_closest(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x, y, z);
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
const nanovdb::Coord coord(int32_t(rintf(x)), int32_t(rintf(y)), int32_t(rintf(z)));
return read(acc.getValue(coord));
}
static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_linear(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
int ix, iy, iz;
const float tx = svm_image_texture_frac(x - 0.5f, &ix);
const float ty = svm_image_texture_frac(y - 0.5f, &iy);
const float tz = svm_image_texture_frac(z - 0.5f, &iz);
return mix(mix(mix(read(acc.getValue(nanovdb::Coord(ix, iy, iz))),
read(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
1.0f - tz),
ty),
mix(mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
1.0f - tz),
1.0f - ty),
tx);
}
static float4 interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
/* Tricubic b-spline interpolation. */
template<typename Acc>
static ccl_always_inline OutT interp_3d_cubic(const Acc &acc, float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
int pix, piy, piz;
int nnix, nniy, nniz;
/* Tri-cubic b-spline interpolation. */
/* A -0.5 offset is used to center the cubic samples around the sample point. */
const float tx = svm_image_texture_frac(x - 0.5f, &ix);
const float ty = svm_image_texture_frac(y - 0.5f, &iy);
const float tz = svm_image_texture_frac(z - 0.5f, &iz);
pix = ix - 1;
piy = iy - 1;
piz = iz - 1;
@ -259,8 +275,8 @@ template<typename T> struct NanoVDBInterpolator {
const int zc[4] = {piz, iz, niz, nniz};
float u[4], v[4], w[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
/* Some helper macros to keep code size reasonable.
* Lets the compiler inline all the matrix multiplications.
*/
# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
# define COL_TERM(col, row) \
@ -281,21 +297,26 @@ template<typename T> struct NanoVDBInterpolator {
# undef DATA
}
static ccl_always_inline float4
static ccl_always_inline OutT
interp_3d(const TextureInfo &info, float x, float y, float z, int interp)
{
using namespace nanovdb;
NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
AccessorType acc = grid->getAccessor();
NanoGrid<TexT> *const grid = (NanoGrid<TexT> *)info.data;
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
case INTERPOLATION_CLOSEST:
switch (interp) {
case INTERPOLATION_CLOSEST: {
ReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_closest(acc, x, y, z);
case INTERPOLATION_LINEAR:
}
case INTERPOLATION_LINEAR: {
CachedReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_linear(acc, x, y, z);
default:
}
default: {
CachedReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_cubic(acc, x, y, z);
}
}
}
};
@ -318,16 +339,21 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals, int id, float3 P, in
#ifdef WITH_NANOVDB
if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
return NanoVDBInterpolator<float>::interp_3d(info, x, y, z, interpolation);
const float f = NanoVDBInterpolator<float, float>::interp_3d(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
else if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, x, y, z, interpolation);
return NanoVDBInterpolator<packed_float3, float4>::interp_3d(info, x, y, z, interpolation);
}
else if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
return NanoVDBInterpolator<nanovdb::FpN>::interp_3d(info, x, y, z, interpolation);
const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
else if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
return NanoVDBInterpolator<nanovdb::Fp16>::interp_3d(info, x, y, z, interpolation);
const float f = NanoVDBInterpolator<nanovdb::Fp16, float>::interp_3d(
info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
#else
if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT ||

View File

@ -45,6 +45,7 @@ typedef unsigned long long uint64_t;
#define ccl_global
#define ccl_inline_constant static __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_static_constexpr static constexpr
#define ccl_constant const
#define ccl_gpu_shared __shared__
#define ccl_private

View File

@ -34,7 +34,7 @@ ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg,
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride)
{

View File

@ -36,9 +36,6 @@ set(LIB
${LLVM_LIBRARY}
)
# OSL and LLVM are built without RTTI
string(APPEND CMAKE_CXX_FLAGS " ${RTTI_DISABLE_FLAGS}")
if(APPLE)
# Disable allocation warning on macOS prior to 10.14: the OSLRenderServices
# contains member which is 64 bytes aligned (cache inside of OIIO's

View File

@ -110,7 +110,17 @@ void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
/* background */
if (kg->osl->background_state) {
#if OSL_LIBRARY_VERSION_CODE >= 11304
ss->execute(*octx,
*(kg->osl->background_state),
kg->osl_thread_index,