1
1
This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/build_files/build_environment/patches/openvdb_metal.diff
Brecht Van Lommel 02c2970983 Cycles: add NanoVDB support for Metal on Apple Silicon
Contributed by Yulia Kuznetcova at Apple.

NanoVDB is patched to give add address spaces required by Metal. We hope that
in the future Metal will support the generic address space.

For AMD and Intel this is currently not available since it causes a performance
regression also on scenes without volumes.

Pull Request #104837
2023-02-21 15:03:52 +01:00

8008 lines
375 KiB
Diff

Index: nanovdb/nanovdb/NanoVDB.h
===================================================================
--- nanovdb/nanovdb/NanoVDB.h (revision 63215)
+++ nanovdb/nanovdb/NanoVDB.h (working copy)
@@ -140,8 +140,28 @@
#define NANOVDB_ALIGN(n) alignas(n)
#endif // !defined(NANOVDB_ALIGN)
-#ifdef __CUDACC_RTC__
+#ifdef __KERNEL_METAL__
+using namespace metal;
+#define std metal
+#define double uint64_t
+#define __global__ device
+#define __local__ thread
+#define __constant__ constant
+#define sqrtf sqrt
+#define rintf rint
+#define fminf fmin
+#define fmaxf fmax
+#define floorf floor
+#define ceilf ceil
+#define fabs abs
+#define fmaf fma
+#define tanf tan
+
+#define NANOVDB_ASSERT(x)
+
+#elif defined(__CUDACC_RTC__)
+
typedef signed char int8_t;
typedef short int16_t;
typedef int int32_t;
@@ -157,6 +177,10 @@
#else // !__CUDACC_RTC__
+#define __constant__ const
+#define __global__
+#define __local__
+
#include <stdlib.h> // for abs in clang7
#include <stdint.h> // for types like int32_t etc
#include <stddef.h> // for size_t type
@@ -262,7 +286,7 @@
Index = 19,// index into an external array of values
End = 20 };
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
/// @brief Retuns a c-string used to describe a GridType
inline const char* toStr(GridType gridType)
{
@@ -289,7 +313,7 @@
IndexGrid = 8,// grid whose values are offsets, e.g. into an external array
End = 9 };
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
/// @brief Retuns a c-string used to describe a GridClass
inline const char* toStr(GridClass gridClass)
{
@@ -313,7 +337,7 @@
End = 1 << 6,
};
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
/// @brief Retuns a c-string used to describe a GridFlags
inline const char* toStr(GridFlags gridFlags)
{
@@ -355,13 +379,13 @@
template<typename T1, typename T2>
struct is_same
{
- static constexpr bool value = false;
+ static __constant__ constexpr bool value = false;
};
template<typename T>
struct is_same<T, T>
{
- static constexpr bool value = true;
+ static __constant__ constexpr bool value = true;
};
// --------------------------> enable_if <------------------------------------
@@ -383,13 +407,13 @@
template<typename T>
struct is_const
{
- static constexpr bool value = false;
+ static __constant__ constexpr bool value = false;
};
template<typename T>
struct is_const<const T>
{
- static constexpr bool value = true;
+ static __constant__ constexpr bool value = true;
};
// --------------------------> remove_const <------------------------------------
@@ -412,7 +436,7 @@
template<typename T>
struct is_floating_point
{
- static const bool value = is_same<T, float>::value || is_same<T, double>::value;
+ static __constant__ const bool value = is_same<T, float>::value || is_same<T, double>::value;
};
// --------------------------> is_specialization <------------------------------------
@@ -425,12 +449,12 @@
template<typename AnyType, template<typename...> class TemplateType>
struct is_specialization
{
- static const bool value = false;
+ static __constant__ const bool value = false;
};
template<typename... Args, template<typename...> class TemplateType>
struct is_specialization<TemplateType<Args...>, TemplateType>
{
- static const bool value = true;
+ static __constant__ const bool value = true;
};
// --------------------------> Value Map <------------------------------------
@@ -495,19 +519,19 @@
// --------------------------> utility functions related to alignment <------------------------------------
/// @brief return true if the specified pointer is aligned
-__hostdev__ inline static bool isAligned(const void* p)
+__hostdev__ inline static bool isAligned(__global__ const void* p)
{
return uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;
}
/// @brief return true if the specified pointer is aligned and not NULL
-__hostdev__ inline static bool isValid(const void* p)
+__hostdev__ inline static bool isValid(__global__ const void* p)
{
return p != nullptr && uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;
}
/// @brief return the smallest number of bytes that when added to the specified pointer results in an aligned pointer
-__hostdev__ inline static uint64_t alignmentPadding(const void* p)
+__hostdev__ inline static uint64_t alignmentPadding(__global__ const void* p)
{
NANOVDB_ASSERT(p);
return (NANOVDB_DATA_ALIGNMENT - (uint64_t(p) % NANOVDB_DATA_ALIGNMENT)) % NANOVDB_DATA_ALIGNMENT;
@@ -515,43 +539,66 @@
/// @brief offset the specified pointer so it is aligned.
template <typename T>
-__hostdev__ inline static T* alignPtr(T* p)
+__hostdev__ inline static __global__ T* alignPtr(__global__ T* p)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<T*>( (uint8_t*)p + alignmentPadding(p) );
+ return reinterpret_cast<__global__ T*>( (__global__ uint8_t*)p + alignmentPadding(p) );
}
/// @brief offset the specified pointer so it is aligned.
template <typename T>
-__hostdev__ inline static const T* alignPtr(const T* p)
+__hostdev__ inline static __global__ const T* alignPtr(__global__ const T* p)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<const T*>( (const uint8_t*)p + alignmentPadding(p) );
+ return reinterpret_cast<__global__ const T*>( (__global__ const uint8_t*)p + alignmentPadding(p) );
}
// --------------------------> PtrDiff PtrAdd <------------------------------------
template <typename T1, typename T2>
-__hostdev__ inline static int64_t PtrDiff(const T1* p, const T2* q)
+__hostdev__ inline static int64_t PtrDiff(__global__ const T1* p, __global__ const T2* q)
{
NANOVDB_ASSERT(p && q);
- return reinterpret_cast<const char*>(p) - reinterpret_cast<const char*>(q);
+ return reinterpret_cast<__global__ const char*>(p) - reinterpret_cast<__global__ const char*>(q);
}
+#if defined(__KERNEL_METAL__)
+template <typename T1, typename T2>
+__hostdev__ inline static int64_t PtrDiff(__local__ const T1* p, __local__ const T2* q)
+{
+ NANOVDB_ASSERT(p && q);
+ return reinterpret_cast<__local__ const char*>(p) - reinterpret_cast<__local__ const char*>(q);
+}
+#endif
template <typename DstT, typename SrcT>
-__hostdev__ inline static DstT* PtrAdd(SrcT *p, int64_t offset)
+__hostdev__ inline static __global__ DstT* PtrAdd(__global__ SrcT *p, int64_t offset)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<DstT*>(reinterpret_cast<char*>(p) + offset);
+ return reinterpret_cast<__global__ DstT*>(reinterpret_cast<__global__ char*>(p) + offset);
}
+#if defined(__KERNEL_METAL__)
+template <typename DstT, typename SrcT>
+__hostdev__ inline static __local__ DstT* PtrAdd(__local__ SrcT *p, int64_t offset)
+{
+ NANOVDB_ASSERT(p);
+ return reinterpret_cast<__local__ DstT*>(reinterpret_cast<__local__ char*>(p) + offset);
+}
+#endif
template <typename DstT, typename SrcT>
-__hostdev__ inline static const DstT* PtrAdd(const SrcT *p, int64_t offset)
+__hostdev__ inline static __global__ const DstT* PtrAdd(__global__ const SrcT *p, int64_t offset)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<const DstT*>(reinterpret_cast<const char*>(p) + offset);
+ return reinterpret_cast<__global__ const DstT*>(reinterpret_cast<__global__ const char*>(p) + offset);
}
-
+#if defined(__KERNEL_METAL__)
+template <typename DstT, typename SrcT>
+__hostdev__ inline static __local__ const DstT* PtrAdd(__local__ const SrcT *p, int64_t offset)
+{
+ NANOVDB_ASSERT(p);
+ return reinterpret_cast<__local__ const DstT*>(reinterpret_cast<__local__ const char*>(p) + offset);
+}
+#endif
// --------------------------> Rgba8 <------------------------------------
/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int
@@ -562,13 +609,13 @@
uint32_t packed;// 32 bit packed representation
} mData;
public:
- static const int SIZE = 4;
+ static __constant__ const int SIZE = 4;
using ValueType = uint8_t;
- Rgba8(const Rgba8&) = default;
- Rgba8(Rgba8&&) = default;
- Rgba8& operator=(Rgba8&&) = default;
- Rgba8& operator=(const Rgba8&) = default;
+ Rgba8(__global__ const Rgba8&) = default;
+ Rgba8(__global__ Rgba8&&) = default;
+ __global__ Rgba8& operator=(__global__ Rgba8&&) __global__ = default;
+ __global__ Rgba8& operator=(__global__ const Rgba8&) __global__ = default;
__hostdev__ Rgba8() : mData{0,0,0,0} {static_assert(sizeof(uint32_t) == sizeof(Rgba8),"Unexpected sizeof");}
__hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) : mData{r, g, b, a} {}
explicit __hostdev__ Rgba8(uint8_t v) : Rgba8(v,v,v,v) {}
@@ -579,8 +626,8 @@
(uint8_t(0.5f + a * 255.0f))}// round to nearest
{
}
- __hostdev__ bool operator<(const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; }
- __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; }
+ __hostdev__ bool operator<(__global__ const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; }
+ __hostdev__ bool operator==(__global__ const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; }
__hostdev__ float lengthSqr() const
{
return 0.0000153787005f*(float(mData.c[0])*mData.c[0] +
@@ -588,18 +635,18 @@
float(mData.c[2])*mData.c[2]);//1/255^2
}
__hostdev__ float length() const { return sqrtf(this->lengthSqr() ); }
- __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; }
- __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; }
- __hostdev__ const uint32_t& packed() const { return mData.packed; }
- __hostdev__ uint32_t& packed() { return mData.packed; }
- __hostdev__ const uint8_t& r() const { return mData.c[0]; }
- __hostdev__ const uint8_t& g() const { return mData.c[1]; }
- __hostdev__ const uint8_t& b() const { return mData.c[2]; }
- __hostdev__ const uint8_t& a() const { return mData.c[3]; }
- __hostdev__ uint8_t& r() { return mData.c[0]; }
- __hostdev__ uint8_t& g() { return mData.c[1]; }
- __hostdev__ uint8_t& b() { return mData.c[2]; }
- __hostdev__ uint8_t& a() { return mData.c[3]; }
+ __hostdev__ __global__ const uint8_t& operator[](int n) const __global__ { return mData.c[n]; }
+ __hostdev__ __global__ uint8_t& operator[](int n) __global__ { return mData.c[n]; }
+ __hostdev__ __global__ const uint32_t& packed() const __global__ { return mData.packed; }
+ __hostdev__ __global__ uint32_t& packed() __global__ { return mData.packed; }
+ __hostdev__ __global__ const uint8_t& r() const __global__ { return mData.c[0]; }
+ __hostdev__ __global__ const uint8_t& g() const __global__ { return mData.c[1]; }
+ __hostdev__ __global__ const uint8_t& b() const __global__ { return mData.c[2]; }
+ __hostdev__ __global__ const uint8_t& a() const __global__ { return mData.c[3]; }
+ __hostdev__ __global__ uint8_t& r() __global__ { return mData.c[0]; }
+ __hostdev__ __global__ uint8_t& g() __global__ { return mData.c[1]; }
+ __hostdev__ __global__ uint8_t& b() __global__ { return mData.c[2]; }
+ __hostdev__ __global__ uint8_t& a() __global__ { return mData.c[3]; }
};// Rgba8
using PackedRGBA8 = Rgba8;// for backwards compatibility
@@ -660,17 +707,17 @@
NANOVDB_ASSERT(minor < (1u << 11));// max value of minor is 2047
NANOVDB_ASSERT(patch < (1u << 10));// max value of patch is 1023
}
- __hostdev__ bool operator==(const Version &rhs) const {return mData == rhs.mData;}
- __hostdev__ bool operator< (const Version &rhs) const {return mData < rhs.mData;}
- __hostdev__ bool operator<=(const Version &rhs) const {return mData <= rhs.mData;}
- __hostdev__ bool operator> (const Version &rhs) const {return mData > rhs.mData;}
- __hostdev__ bool operator>=(const Version &rhs) const {return mData >= rhs.mData;}
+ __hostdev__ bool operator==(__global__ const Version &rhs) const {return mData == rhs.mData;}
+ __hostdev__ bool operator< (__global__ const Version &rhs) const {return mData < rhs.mData;}
+ __hostdev__ bool operator<=(__global__ const Version &rhs) const {return mData <= rhs.mData;}
+ __hostdev__ bool operator> (__global__ const Version &rhs) const {return mData > rhs.mData;}
+ __hostdev__ bool operator>=(__global__ const Version &rhs) const {return mData >= rhs.mData;}
__hostdev__ uint32_t id() const { return mData; }
__hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1);}
__hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1);}
__hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1);}
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
const char* c_str() const
{
char *buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1);// xxxx.xxxx.xxxx\0
@@ -749,7 +796,7 @@
//@}
template<typename Type>
-__hostdev__ inline bool isApproxZero(const Type& x)
+__hostdev__ inline bool isApproxZero(__global__ const Type& x)
{
return !(x > Tolerance<Type>::value()) && !(x < -Tolerance<Type>::value());
}
@@ -771,10 +818,12 @@
{
return fminf(a, b);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Min(double a, double b)
{
return fmin(a, b);
}
+#endif
template<typename Type>
__hostdev__ inline Type Max(Type a, Type b)
{
@@ -793,45 +842,55 @@
{
return fmaxf(a, b);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Max(double a, double b)
{
return fmax(a, b);
}
+#endif
__hostdev__ inline float Clamp(float x, float a, float b)
{
return Max(Min(x, b), a);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Clamp(double x, double a, double b)
{
return Max(Min(x, b), a);
}
+#endif
__hostdev__ inline float Fract(float x)
{
return x - floorf(x);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Fract(double x)
{
return x - floor(x);
}
+#endif
__hostdev__ inline int32_t Floor(float x)
{
return int32_t(floorf(x));
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline int32_t Floor(double x)
{
return int32_t(floor(x));
}
+#endif
__hostdev__ inline int32_t Ceil(float x)
{
return int32_t(ceilf(x));
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline int32_t Ceil(double x)
{
return int32_t(ceil(x));
}
+#endif
template<typename T>
__hostdev__ inline T Pow2(T x)
@@ -875,28 +934,54 @@
}
template<typename CoordT, typename RealT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Round(const Vec3T<RealT>& xyz);
+__hostdev__ inline CoordT Round(__global__ const Vec3T<RealT>& xyz);
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, typename RealT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Round(__local__ const Vec3T<RealT>& xyz);
+#endif
template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Round(const Vec3T<float>& xyz)
+__hostdev__ inline CoordT Round(__global__ const Vec3T<float>& xyz)
{
return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2])));
//return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) );
//return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f)));
}
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Round(__local__ const Vec3T<float>& xyz)
+{
+ return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2])));
+ //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) );
+ //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f)));
+}
+#endif
template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Round(const Vec3T<double>& xyz)
+__hostdev__ inline CoordT Round(__global__ const Vec3T<double>& xyz)
{
return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5)));
}
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Round(__local__ const Vec3T<double>& xyz)
+{
+ return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5)));
+}
+#endif
template<typename CoordT, typename RealT, template<typename> class Vec3T>
-__hostdev__ inline CoordT RoundDown(const Vec3T<RealT>& xyz)
+__hostdev__ inline CoordT RoundDown(__global__ const Vec3T<RealT>& xyz)
{
return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2]));
}
-
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, typename RealT, template<typename> class Vec3T>
+__hostdev__ inline CoordT RoundDown(__local__ const Vec3T<RealT>& xyz)
+{
+ return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2]));
+}
+#endif
//@{
/// Return the square root of a floating-point value.
__hostdev__ inline float Sqrt(float x)
@@ -903,18 +988,24 @@
{
return sqrtf(x);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Sqrt(double x)
{
return sqrt(x);
}
+#endif
//@}
/// Return the sign of the given value as an integer (either -1, 0 or 1).
template <typename T>
-__hostdev__ inline T Sign(const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); }
+__hostdev__ inline T Sign(__global__ const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); }
+#if defined(__KERNEL_METAL__)
+template <typename T>
+__hostdev__ inline T Sign(__local__ const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); }
+#endif
template<typename Vec3T>
-__hostdev__ inline int MinIndex(const Vec3T& v)
+__hostdev__ inline int MinIndex(__global__ const Vec3T& v)
{
#if 0
static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
@@ -930,11 +1021,30 @@
#endif
}
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
-__hostdev__ inline int MaxIndex(const Vec3T& v)
+__hostdev__ inline int MinIndex(__local__ const Vec3T& v)
{
#if 0
static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
+ const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1
+ return hashTable[hashKey];
+#else
+ if (v[0] < v[1] && v[0] < v[2])
+ return 0;
+ if (v[1] < v[2])
+ return 1;
+ else
+ return 2;
+#endif
+}
+#endif
+
+template<typename Vec3T>
+__hostdev__ inline int MaxIndex(__global__ const Vec3T& v)
+{
+#if 0
+ static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1
return hashTable[hashKey];
#else
@@ -947,6 +1057,25 @@
#endif
}
+#if defined(__KERNEL_METAL__)
+template<typename Vec3T>
+__hostdev__ inline int MaxIndex(__local__ const Vec3T& v)
+{
+#if 0
+ static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
+ const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1
+ return hashTable[hashKey];
+#else
+ if (v[0] > v[1] && v[0] > v[2])
+ return 0;
+ if (v[1] > v[2])
+ return 1;
+ else
+ return 2;
+#endif
+}
+#endif
+
/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp<sizeof(size_t)(n)
///
/// @details both wordSize and byteSize are in byte units
@@ -988,7 +1117,7 @@
{
}
- __hostdev__ Coord(ValueType *ptr)
+ __hostdev__ Coord(__global__ ValueType *ptr)
: mVec{ptr[0], ptr[1], ptr[2]}
{
}
@@ -997,9 +1126,9 @@
__hostdev__ int32_t y() const { return mVec[1]; }
__hostdev__ int32_t z() const { return mVec[2]; }
- __hostdev__ int32_t& x() { return mVec[0]; }
- __hostdev__ int32_t& y() { return mVec[1]; }
- __hostdev__ int32_t& z() { return mVec[2]; }
+ __hostdev__ __global__ int32_t& x() __global__ { return mVec[0]; }
+ __hostdev__ __global__ int32_t& y() __global__ { return mVec[1]; }
+ __hostdev__ __global__ int32_t& z() __global__ { return mVec[2]; }
__hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); }
@@ -1009,15 +1138,21 @@
/// @brief Return a const reference to the given Coord component.
/// @warning The argument is assumed to be 0, 1, or 2.
- __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; }
+ __hostdev__ __global__ const ValueType& operator[](IndexType i) const __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const ValueType& operator[](IndexType i) const __local__ { return mVec[i]; }
+#endif
/// @brief Return a non-const reference to the given Coord component.
/// @warning The argument is assumed to be 0, 1, or 2.
- __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; }
+ __hostdev__ __global__ ValueType& operator[](IndexType i) __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ ValueType& operator[](IndexType i) __local__ { return mVec[i]; }
+#endif
/// @brief Assignment operator that works with openvdb::Coord
template <typename CoordT>
- __hostdev__ Coord& operator=(const CoordT &other)
+ __hostdev__ __global__ Coord& operator=(__global__ const CoordT &other) __global__
{
static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof");
mVec[0] = other[0];
@@ -1025,6 +1160,17 @@
mVec[2] = other[2];
return *this;
}
+#if defined(__KERNEL_METAL__)
+ template <typename CoordT>
+ __hostdev__ __local__ Coord& operator=(__local__ const CoordT &other) __local__
+ {
+ static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof");
+ mVec[0] = other[0];
+ mVec[1] = other[1];
+ mVec[2] = other[2];
+ return *this;
+ }
+#endif
/// @brief Return a new instance with coordinates masked by the given unsigned integer.
__hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); }
@@ -1036,15 +1182,15 @@
__hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); }
/// @brief Return true if this Coord is lexicographically less than the given Coord.
- __hostdev__ bool operator<(const Coord& rhs) const
+ __hostdev__ bool operator<(__global__ const Coord& rhs) const
{
return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false : mVec[2] < rhs[2] ? true : false;
}
// @brief Return true if the Coord components are identical.
- __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
- __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
- __hostdev__ Coord& operator&=(int n)
+ __hostdev__ bool operator==(__global__ const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
+ __hostdev__ bool operator!=(__global__ const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
+ __hostdev__ __global__ Coord& operator&=(int n) __global__
{
mVec[0] &= n;
mVec[1] &= n;
@@ -1051,7 +1197,7 @@
mVec[2] &= n;
return *this;
}
- __hostdev__ Coord& operator<<=(uint32_t n)
+ __hostdev__ __global__ Coord& operator<<=(uint32_t n) __global__
{
mVec[0] <<= n;
mVec[1] <<= n;
@@ -1058,7 +1204,7 @@
mVec[2] <<= n;
return *this;
}
- __hostdev__ Coord& operator>>=(uint32_t n)
+ __hostdev__ __global__ Coord& operator>>=(uint32_t n) __global__
{
mVec[0] >>= n;
mVec[1] >>= n;
@@ -1065,7 +1211,7 @@
mVec[2] >>= n;
return *this;
}
- __hostdev__ Coord& operator+=(int n)
+ __hostdev__ __global__ Coord& operator+=(int n) __global__
{
mVec[0] += n;
mVec[1] += n;
@@ -1072,9 +1218,9 @@
mVec[2] += n;
return *this;
}
- __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); }
- __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); }
- __hostdev__ Coord& operator+=(const Coord& rhs)
+ __hostdev__ Coord operator+(__global__ const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); }
+ __hostdev__ Coord operator-(__global__ const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); }
+ __hostdev__ __global__ Coord& operator+=(__global__ const Coord& rhs) __global__
{
mVec[0] += rhs[0];
mVec[1] += rhs[1];
@@ -1081,7 +1227,7 @@
mVec[2] += rhs[2];
return *this;
}
- __hostdev__ Coord& operator-=(const Coord& rhs)
+ __hostdev__ __global__ Coord& operator-=(__global__ const Coord& rhs) __global__
{
mVec[0] -= rhs[0];
mVec[1] -= rhs[1];
@@ -1090,7 +1236,7 @@
}
/// @brief Perform a component-wise minimum with the other Coord.
- __hostdev__ Coord& minComponent(const Coord& other)
+ __hostdev__ __global__ Coord& minComponent(__global__ const Coord& other) __global__
{
if (other[0] < mVec[0])
mVec[0] = other[0];
@@ -1102,7 +1248,7 @@
}
/// @brief Perform a component-wise maximum with the other Coord.
- __hostdev__ Coord& maxComponent(const Coord& other)
+ __hostdev__ __global__ Coord& maxComponent(__global__ const Coord& other) __global__
{
if (other[0] > mVec[0])
mVec[0] = other[0];
@@ -1113,16 +1259,16 @@
return *this;
}
- __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const
+ __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const __global__
{
return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz);
}
- __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); }
+ __hostdev__ Coord offsetBy(ValueType n) const __global__ { return this->offsetBy(n, n, n); }
/// Return true if any of the components of @a a are smaller than the
/// corresponding components of @a b.
- __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b)
+ __hostdev__ static inline bool lessThan(__global__ const Coord& a, __global__ const Coord& b)
{
return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]);
}
@@ -1130,7 +1276,13 @@
/// @brief Return the largest integer coordinates that are not greater
/// than @a xyz (node centered conversion).
template<typename Vec3T>
- __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); }
+ __hostdev__ static Coord Floor(__global__ const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); }
+#if defined __KERNEL_METAL__
+ /// @brief Return the largest integer coordinates that are not greater
+ /// than @a xyz (node centered conversion).
+ template<typename Vec3T>
+ __hostdev__ static Coord Floor(__local__ const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); }
+#endif
/// @brief Return a hash key derived from the existing coordinates.
/// @details For details on this hash function please see the VDB paper.
@@ -1159,7 +1311,7 @@
T mVec[3];
public:
- static const int SIZE = 3;
+ static __constant__ const int SIZE = 3;
using ValueType = T;
Vec3() = default;
__hostdev__ explicit Vec3(T x)
@@ -1171,18 +1323,18 @@
{
}
template<typename T2>
- __hostdev__ explicit Vec3(const Vec3<T2>& v)
+ __hostdev__ explicit Vec3(__global__ const Vec3<T2>& v)
: mVec{T(v[0]), T(v[1]), T(v[2])}
{
}
- __hostdev__ explicit Vec3(const Coord& ijk)
+ __hostdev__ explicit Vec3(__global__ const Coord& ijk)
: mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])}
{
}
- __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
- __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
+ __hostdev__ bool operator==(__global__ const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
+ __hostdev__ bool operator!=(__global__ const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
template<typename Vec3T>
- __hostdev__ Vec3& operator=(const Vec3T& rhs)
+ __hostdev__ __global__ Vec3& operator=(__global__ const Vec3T& rhs)
{
mVec[0] = rhs[0];
mVec[1] = rhs[1];
@@ -1189,12 +1341,18 @@
mVec[2] = rhs[2];
return *this;
}
- __hostdev__ const T& operator[](int i) const { return mVec[i]; }
- __hostdev__ T& operator[](int i) { return mVec[i]; }
+ __hostdev__ __global__ const T& operator[](int i) const __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const T& operator[](int i) const __local__ { return mVec[i]; }
+#endif
+ __hostdev__ __global__ T& operator[](int i) __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ T& operator[](int i) __local__ { return mVec[i]; }
+#endif
template<typename Vec3T>
- __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; }
+ __hostdev__ T dot(__global__ const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; }
template<typename Vec3T>
- __hostdev__ Vec3 cross(const Vec3T& v) const
+ __hostdev__ Vec3 cross(__global__ const Vec3T& v) const
{
return Vec3(mVec[1] * v[2] - mVec[2] * v[1],
mVec[2] * v[0] - mVec[0] * v[2],
@@ -1206,13 +1364,26 @@
}
__hostdev__ T length() const { return Sqrt(this->lengthSqr()); }
__hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); }
- __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); }
- __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); }
- __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); }
- __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); }
- __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); }
- __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); }
- __hostdev__ Vec3& operator+=(const Vec3& v)
+ __hostdev__ Vec3 operator*(__global__ const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator*(__local__ const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); }
+#endif
+ __hostdev__ Vec3 operator/(__global__ const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator/(__local__ const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); }
+#endif
+ __hostdev__ Vec3 operator+(__global__ const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator-(__local__ const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); }
+ __hostdev__ Vec3 operator+(__local__ const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); }
+#endif
+ __hostdev__ Vec3 operator-(__global__ const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); }
+ __hostdev__ Vec3 operator*(__global__ const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator*(__local__ const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); }
+#endif
+ __hostdev__ Vec3 operator/(__global__ const T& s) const { return (T(1) / s) * (*this); }
+ __hostdev__ __global__ Vec3& operator+=(__global__ const Vec3& v)
{
mVec[0] += v[0];
mVec[1] += v[1];
@@ -1219,7 +1390,7 @@
mVec[2] += v[2];
return *this;
}
- __hostdev__ Vec3& operator-=(const Vec3& v)
+ __hostdev__ __global__ Vec3& operator-=(__global__ const Vec3& v)
{
mVec[0] -= v[0];
mVec[1] -= v[1];
@@ -1226,7 +1397,7 @@
mVec[2] -= v[2];
return *this;
}
- __hostdev__ Vec3& operator*=(const T& s)
+ __hostdev__ __global__ Vec3& operator*=(__global__ const T& s)
{
mVec[0] *= s;
mVec[1] *= s;
@@ -1233,10 +1404,22 @@
mVec[2] *= s;
return *this;
}
- __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; }
- __hostdev__ Vec3& normalize() { return (*this) /= this->length(); }
+#if defined __KERNEL_METAL__
+ __hostdev__ __local__ Vec3& operator*=(__local__ const T& s)
+ {
+ mVec[0] *= s;
+ mVec[1] *= s;
+ mVec[2] *= s;
+ return *this;
+ }
+#endif
+ __hostdev__ __global__ Vec3& operator/=(__global__ const T& s) { return (*this) *= T(1) / s; }
+#if defined __KERNEL_METAL__
+ __hostdev__ __local__ Vec3& operator/=(__local__ const T& s) { return (*this) *= T(1) / s; }
+#endif
+ __hostdev__ __global__ Vec3& normalize() { return (*this) /= this->length(); }
/// @brief Perform a component-wise minimum with the other Coord.
- __hostdev__ Vec3& minComponent(const Vec3& other)
+ __hostdev__ __global__ Vec3& minComponent(__global__ const Vec3& other)
{
if (other[0] < mVec[0])
mVec[0] = other[0];
@@ -1248,7 +1431,7 @@
}
/// @brief Perform a component-wise maximum with the other Coord.
- __hostdev__ Vec3& maxComponent(const Vec3& other)
+ __hostdev__ __global__ Vec3& maxComponent(__global__ const Vec3& other)
{
if (other[0] > mVec[0])
mVec[0] = other[0];
@@ -1274,15 +1457,29 @@
}; // Vec3<T>
template<typename T1, typename T2>
-__hostdev__ inline Vec3<T2> operator*(T1 scalar, const Vec3<T2>& vec)
+__hostdev__ inline Vec3<T2> operator*(T1 scalar, __global__ const Vec3<T2>& vec)
{
return Vec3<T2>(scalar * vec[0], scalar * vec[1], scalar * vec[2]);
}
+#if defined(__KERNEL_METAL__)
template<typename T1, typename T2>
-__hostdev__ inline Vec3<T2> operator/(T1 scalar, const Vec3<T2>& vec)
+__hostdev__ inline Vec3<T2> operator*(T1 scalar, __local__ const Vec3<T2>& vec)
{
+ return Vec3<T2>(scalar * vec[0], scalar * vec[1], scalar * vec[2]);
+}
+#endif
+template<typename T1, typename T2>
+__hostdev__ inline Vec3<T2> operator/(T1 scalar, __global__ const Vec3<T2>& vec)
+{
return Vec3<T2>(scalar / vec[0], scalar / vec[1], scalar / vec[2]);
}
+#if defined(__KERNEL_METAL__)
+template<typename T1, typename T2>
+__hostdev__ inline Vec3<T2> operator/(T1 scalar, __local__ const Vec3<T2>& vec)
+{
+ return Vec3<T2>(scalar / vec[0], scalar / vec[1], scalar / vec[2]);
+}
+#endif
using Vec3R = Vec3<double>;
using Vec3d = Vec3<double>;
@@ -1304,7 +1501,7 @@
T mVec[4];
public:
- static const int SIZE = 4;
+ static __constant__ const int SIZE = 4;
using ValueType = T;
Vec4() = default;
__hostdev__ explicit Vec4(T x)
@@ -1316,14 +1513,14 @@
{
}
template<typename T2>
- __hostdev__ explicit Vec4(const Vec4<T2>& v)
+ __hostdev__ explicit Vec4(__global__ const Vec4<T2>& v)
: mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])}
{
}
- __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; }
- __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; }
+ __hostdev__ bool operator==(__global__ const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; }
+ __hostdev__ bool operator!=(__global__ const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; }
template<typename Vec4T>
- __hostdev__ Vec4& operator=(const Vec4T& rhs)
+ __hostdev__ __global__ Vec4& operator=(__global__ const Vec4T& rhs)
{
mVec[0] = rhs[0];
mVec[1] = rhs[1];
@@ -1331,10 +1528,10 @@
mVec[3] = rhs[3];
return *this;
}
- __hostdev__ const T& operator[](int i) const { return mVec[i]; }
- __hostdev__ T& operator[](int i) { return mVec[i]; }
+ __hostdev__ __global__ const T& operator[](int i) const { return mVec[i]; }
+ __hostdev__ __global__ T& operator[](int i) { return mVec[i]; }
template<typename Vec4T>
- __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; }
+ __hostdev__ T dot(__global__ const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; }
__hostdev__ T lengthSqr() const
{
return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops
@@ -1341,13 +1538,13 @@
}
__hostdev__ T length() const { return Sqrt(this->lengthSqr()); }
__hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); }
- __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); }
- __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); }
- __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); }
- __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); }
- __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); }
- __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); }
- __hostdev__ Vec4& operator+=(const Vec4& v)
+ __hostdev__ Vec4 operator*(__global__ const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); }
+ __hostdev__ Vec4 operator/(__global__ const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); }
+ __hostdev__ Vec4 operator+(__global__ const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); }
+ __hostdev__ Vec4 operator-(__global__ const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); }
+ __hostdev__ Vec4 operator*(__global__ const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); }
+ __hostdev__ Vec4 operator/(__global__ const T& s) const { return (T(1) / s) * (*this); }
+ __hostdev__ __global__ Vec4& operator+=(__global__ const Vec4& v)
{
mVec[0] += v[0];
mVec[1] += v[1];
@@ -1355,7 +1552,7 @@
mVec[3] += v[3];
return *this;
}
- __hostdev__ Vec4& operator-=(const Vec4& v)
+ __hostdev__ __global__ Vec4& operator-=(__global__ const Vec4& v)
{
mVec[0] -= v[0];
mVec[1] -= v[1];
@@ -1363,7 +1560,7 @@
mVec[3] -= v[3];
return *this;
}
- __hostdev__ Vec4& operator*=(const T& s)
+ __hostdev__ __global__ Vec4& operator*=(__global__ const T& s)
{
mVec[0] *= s;
mVec[1] *= s;
@@ -1371,10 +1568,10 @@
mVec[3] *= s;
return *this;
}
- __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; }
- __hostdev__ Vec4& normalize() { return (*this) /= this->length(); }
+ __hostdev__ __global__ Vec4& operator/=(__global__ const T& s) { return (*this) *= T(1) / s; }
+ __hostdev__ __global__ Vec4& normalize() { return (*this) /= this->length(); }
/// @brief Perform a component-wise minimum with the other Coord.
- __hostdev__ Vec4& minComponent(const Vec4& other)
+ __hostdev__ __global__ Vec4& minComponent(__global__ const Vec4& other)
{
if (other[0] < mVec[0])
mVec[0] = other[0];
@@ -1388,7 +1585,7 @@
}
/// @brief Perform a component-wise maximum with the other Coord.
- __hostdev__ Vec4& maxComponent(const Vec4& other)
+ __hostdev__ __global__ Vec4& maxComponent(__global__ const Vec4& other)
{
if (other[0] > mVec[0])
mVec[0] = other[0];
@@ -1403,12 +1600,12 @@
}; // Vec4<T>
template<typename T1, typename T2>
-__hostdev__ inline Vec4<T2> operator*(T1 scalar, const Vec4<T2>& vec)
+__hostdev__ inline Vec4<T2> operator*(T1 scalar, __global__ const Vec4<T2>& vec)
{
return Vec4<T2>(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]);
}
template<typename T1, typename T2>
-__hostdev__ inline Vec4<T2> operator/(T1 scalar, const Vec3<T2>& vec)
+__hostdev__ inline Vec4<T2> operator/(T1 scalar, __global__ const Vec3<T2>& vec)
{
return Vec4<T2>(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]);
}
@@ -1428,23 +1625,23 @@
template<typename T>
struct TensorTraits<T, 0>
{
- static const int Rank = 0; // i.e. scalar
- static const bool IsScalar = true;
- static const bool IsVector = false;
- static const int Size = 1;
+ static __constant__ const int Rank = 0; // i.e. scalar
+ static __constant__ const bool IsScalar = true;
+ static __constant__ const bool IsVector = false;
+ static __constant__ const int Size = 1;
using ElementType = T;
- static T scalar(const T& s) { return s; }
+ static T scalar(__global__ const T& s) { return s; }
};
template<typename T>
struct TensorTraits<T, 1>
{
- static const int Rank = 1; // i.e. vector
- static const bool IsScalar = false;
- static const bool IsVector = true;
- static const int Size = T::SIZE;
+ static __constant__ const int Rank = 1; // i.e. vector
+ static __constant__ const bool IsScalar = false;
+ static __constant__ const bool IsVector = true;
+ static __constant__ const int Size = T::SIZE;
using ElementType = typename T::ValueType;
- static ElementType scalar(const T& v) { return v.length(); }
+ static ElementType scalar(__global__ const T& v) { return v.length(); }
};
// ----------------------------> FloatTraits <--------------------------------------
@@ -1528,71 +1725,80 @@
// ----------------------------> matMult <--------------------------------------
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const float* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], xyz[2] * mat[2])),
fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], xyz[2] * mat[5])),
fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const float* mat, __local__ const Vec3T& xyz)
{
+ return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], xyz[2] * mat[2])),
+ fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], xyz[2] * mat[5])),
+ fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops
+}
+#endif
+#ifndef __KERNEL_METAL__
+template<typename Vec3T>
+__hostdev__ inline Vec3T matMult(__global__ const double* mat, __global__ const Vec3T& xyz)
+{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[1], static_cast<double>(xyz[2]) * mat[2])),
fma(static_cast<double>(xyz[0]), mat[3], fma(static_cast<double>(xyz[1]), mat[4], static_cast<double>(xyz[2]) * mat[5])),
fma(static_cast<double>(xyz[0]), mat[6], fma(static_cast<double>(xyz[1]), mat[7], static_cast<double>(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#endif
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const float* mat, __global__ const float* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], fmaf(xyz[2], mat[2], vec[0]))),
fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[5], vec[1]))),
fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops
}
-
+#ifndef __KERNEL_METAL__
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const double* mat, __global__ const double* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[1], fma(static_cast<double>(xyz[2]), mat[2], vec[0]))),
fma(static_cast<double>(xyz[0]), mat[3], fma(static_cast<double>(xyz[1]), mat[4], fma(static_cast<double>(xyz[2]), mat[5], vec[1]))),
fma(static_cast<double>(xyz[0]), mat[6], fma(static_cast<double>(xyz[1]), mat[7], fma(static_cast<double>(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops
}
-
+#endif
// matMultT: Multiply with the transpose:
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const float* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], xyz[2] * mat[6])),
fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], xyz[2] * mat[7])),
fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#ifndef __KERNEL_METAL__
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const double* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[3], static_cast<double>(xyz[2]) * mat[6])),
fma(static_cast<double>(xyz[0]), mat[1], fma(static_cast<double>(xyz[1]), mat[4], static_cast<double>(xyz[2]) * mat[7])),
fma(static_cast<double>(xyz[0]), mat[2], fma(static_cast<double>(xyz[1]), mat[5], static_cast<double>(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#endif
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const float* mat, __global__ const float* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], fmaf(xyz[2], mat[6], vec[0]))),
fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[7], vec[1]))),
fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops
}
-
+#ifndef __KERNEL_METAL__
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const double* mat, __global__ const double* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[3], fma(static_cast<double>(xyz[2]), mat[6], vec[0]))),
fma(static_cast<double>(xyz[0]), mat[1], fma(static_cast<double>(xyz[1]), mat[4], fma(static_cast<double>(xyz[2]), mat[7], vec[1]))),
fma(static_cast<double>(xyz[0]), mat[2], fma(static_cast<double>(xyz[1]), mat[5], fma(static_cast<double>(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops
}
-
+#endif
// ----------------------------> BBox <-------------------------------------
// Base-class for static polymorphism (cannot be constructed directly)
@@ -1600,15 +1806,27 @@
struct BaseBBox
{
Vec3T mCoord[2];
- __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; };
- __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; };
- __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; }
- __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; }
- __hostdev__ Vec3T& min() { return mCoord[0]; }
- __hostdev__ Vec3T& max() { return mCoord[1]; }
- __hostdev__ const Vec3T& min() const { return mCoord[0]; }
- __hostdev__ const Vec3T& max() const { return mCoord[1]; }
- __hostdev__ Coord& translate(const Vec3T& xyz)
+ __hostdev__ bool operator==(__global__ const BaseBBox& rhs) const __global__ { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; };
+ __hostdev__ bool operator!=(__global__ const BaseBBox& rhs) const __global__ { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; };
+ __hostdev__ __global__ const Vec3T& operator[](int i) const __global__ { return mCoord[i]; }
+ __hostdev__ __global__ Vec3T& operator[](int i) __global__ { return mCoord[i]; }
+ __hostdev__ __global__ Vec3T& min() __global__ { return mCoord[0]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ Vec3T& min() __local__ { return mCoord[0]; }
+#endif
+ __hostdev__ __global__ Vec3T& max() __global__ { return mCoord[1]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ Vec3T& max() __local__ { return mCoord[1]; }
+#endif
+ __hostdev__ __global__ const Vec3T& min() const __global__ { return mCoord[0]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const Vec3T& min() const __local__ { return mCoord[0]; }
+#endif
+ __hostdev__ __global__ const Vec3T& max() const __global__ { return mCoord[1]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const Vec3T& max() const __local__ { return mCoord[1]; }
+#endif
+ __hostdev__ __global__ Coord& translate(__global__ const Vec3T& xyz) __global__
{
mCoord[0] += xyz;
mCoord[1] += xyz;
@@ -1615,7 +1833,7 @@
return *this;
}
// @brief Expand this bounding box to enclose point (i, j, k).
- __hostdev__ BaseBBox& expand(const Vec3T& xyz)
+ __hostdev__ __global__ BaseBBox& expand(__global__ const Vec3T& xyz) __global__
{
mCoord[0].minComponent(xyz);
mCoord[1].maxComponent(xyz);
@@ -1623,7 +1841,7 @@
}
/// @brief Intersect this bounding box with the given bounding box.
- __hostdev__ BaseBBox& intersect(const BaseBBox& bbox)
+ __hostdev__ __global__ BaseBBox& intersect(__global__ const BaseBBox& bbox) __global__
{
mCoord[0].maxComponent(bbox.min());
mCoord[1].minComponent(bbox.max());
@@ -1634,7 +1852,7 @@
//{
// return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding));
//}
- __hostdev__ bool isInside(const Vec3T& xyz)
+ __hostdev__ bool isInside(__global__ const Vec3T& xyz)
{
if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2])
return false;
@@ -1642,10 +1860,20 @@
return false;
return true;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isInside(__local__ const Vec3T& xyz)
+ {
+ if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2])
+ return false;
+ if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2])
+ return false;
+ return true;
+ }
+#endif
protected:
__hostdev__ BaseBBox() {}
- __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max)
+ __hostdev__ BaseBBox(__global__ const Vec3T& min, __global__ const Vec3T& max)
: mCoord{min, max}
{
}
@@ -1659,38 +1887,45 @@
/// @note Min is inclusive and max is exclusive. If min = max the dimension of
/// the bounding box is zero and therefore it is also empty.
template<typename Vec3T>
-struct BBox<Vec3T, true> : public BaseBBox<Vec3T>
+struct BBox<Vec3T, true>
+#if !defined(__KERNEL_METAL__)
+ : public BaseBBox<Vec3T>
+#endif
{
using Vec3Type = Vec3T;
using ValueType = typename Vec3T::ValueType;
static_assert(is_floating_point<ValueType>::value, "Expected a floating point coordinate type");
using BaseT = BaseBBox<Vec3T>;
+#if defined(__KERNEL_METAL__)
+ BaseBBox<Vec3T> mCoord;
+#else
using BaseT::mCoord;
+#endif
+
__hostdev__ BBox()
: BaseT(Vec3T( Maximum<typename Vec3T::ValueType>::value()),
Vec3T(-Maximum<typename Vec3T::ValueType>::value()))
{
}
- __hostdev__ BBox(const Vec3T& min, const Vec3T& max)
+ __hostdev__ BBox(__global__ const Vec3T& min, __global__ const Vec3T& max)
: BaseT(min, max)
{
}
- __hostdev__ BBox(const Coord& min, const Coord& max)
+ __hostdev__ BBox(__global__ const Coord& min, __global__ const Coord& max)
: BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])),
Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1)))
{
}
- __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim)
+ __hostdev__ static BBox createCube(__global__ const Coord& min, typename Coord::ValueType dim)
{
return BBox(min, min.offsetBy(dim));
}
-
- __hostdev__ BBox(const BaseBBox<Coord>& bbox) : BBox(bbox[0], bbox[1]) {}
+ __hostdev__ BBox(__global__ const BaseBBox<Coord>& bbox) __global__ : BBox(bbox[0], bbox[1]) {}
__hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] ||
mCoord[0][1] >= mCoord[1][1] ||
mCoord[0][2] >= mCoord[1][2]; }
__hostdev__ Vec3T dim() const { return this->empty() ? Vec3T(0) : this->max() - this->min(); }
- __hostdev__ bool isInside(const Vec3T& p) const
+ __hostdev__ bool isInside(__global__ const Vec3T& p) const
{
return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] &&
p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2];
@@ -1703,24 +1938,32 @@
/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So,
/// if min = max the bounding box contains exactly one point and dim = 1!
template<typename CoordT>
-struct BBox<CoordT, false> : public BaseBBox<CoordT>
+struct BBox<CoordT, false>
+#if !defined(__KERNEL_METAL__)
+ : public BaseBBox<CoordT>
+#endif
{
+
static_assert(is_same<int, typename CoordT::ValueType>::value, "Expected \"int\" coordinate type");
using BaseT = BaseBBox<CoordT>;
+#if defined(__KERNEL_METAL__)
+ BaseBBox<CoordT> mCoord;
+#else
using BaseT::mCoord;
+#endif
/// @brief Iterator over the domain covered by a BBox
/// @details z is the fastest-moving coordinate.
class Iterator
{
- const BBox& mBBox;
+ __global__ const BBox& mBBox;
CoordT mPos;
public:
- __hostdev__ Iterator(const BBox& b)
+ __hostdev__ Iterator(__global__ const BBox& b)
: mBBox(b)
, mPos(b.min())
{
}
- __hostdev__ Iterator& operator++()
+ __hostdev__ __global__ Iterator& operator++()
{
if (mPos[2] < mBBox[1][2]) {// this is the most common case
++mPos[2];
@@ -1734,7 +1977,7 @@
}
return *this;
}
- __hostdev__ Iterator operator++(int)
+ __hostdev__ Iterator operator++(int) __global__
{
auto tmp = *this;
++(*this);
@@ -1742,7 +1985,7 @@
}
/// @brief Return @c true if the iterator still points to a valid coordinate.
__hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; }
- __hostdev__ const CoordT& operator*() const { return mPos; }
+ __hostdev__ __global__ const CoordT& operator*() const { return mPos; }
}; // Iterator
__hostdev__ Iterator begin() const { return Iterator{*this}; }
__hostdev__ BBox()
@@ -1749,13 +1992,13 @@
: BaseT(CoordT::max(), CoordT::min())
{
}
- __hostdev__ BBox(const CoordT& min, const CoordT& max)
+ __hostdev__ BBox(__global__ const CoordT& min, __global__ const CoordT& max)
: BaseT(min, max)
{
}
template<typename SplitT>
- __hostdev__ BBox(BBox& other, const SplitT&)
+ __hostdev__ BBox(__global__ BBox& other, __global__ const SplitT&)
: BaseT(other.mCoord[0], other.mCoord[1])
{
NANOVDB_ASSERT(this->is_divisible());
@@ -1764,7 +2007,7 @@
other.mCoord[0][n] = mCoord[1][n] + 1;
}
- __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim)
+ __hostdev__ static BBox createCube(__global__ const CoordT& min, typename CoordT::ValueType dim)
{
return BBox(min, min.offsetBy(dim - 1));
}
@@ -1778,15 +2021,23 @@
mCoord[0][2] > mCoord[1][2]; }
__hostdev__ CoordT dim() const { return this->empty() ? Coord(0) : this->max() - this->min() + Coord(1); }
__hostdev__ uint64_t volume() const { auto d = this->dim(); return uint64_t(d[0])*uint64_t(d[1])*uint64_t(d[2]); }
- __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); }
- /// @brief Return @c true if the given bounding box is inside this bounding box.
- __hostdev__ bool isInside(const BBox& b) const
+ __hostdev__ bool isInside(__global__ const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isInside(__local__ const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); }
+#endif
+ __hostdev__ bool isInside(__global__ const BBox& b) const
{
return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max()));
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isInside(__local__ const BBox& b) const
+ {
+ return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max()));
+ }
+#endif
/// @brief Return @c true if the given bounding box overlaps with this bounding box.
- __hostdev__ bool hasOverlap(const BBox& b) const
+ __hostdev__ bool hasOverlap(__global__ const BBox& b) const
{
return !(CoordT::lessThan(this->max(), b.min()) || CoordT::lessThan(b.max(), this->min()));
}
@@ -1826,6 +2077,8 @@
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return static_cast<uint32_t>(__builtin_ctzl(v));
+#elif defined(__KERNEL_METAL__)
+ return ctz(v);
#else
//#warning Using software implementation for FindLowestOn(uint32_t)
static const unsigned char DeBruijn[32] = {
@@ -1856,6 +2109,8 @@
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v);
+#elif defined(__KERNEL_METAL__)
+ return clz(v);
#else
//#warning Using software implementation for FindHighestOn(uint32_t)
static const unsigned char DeBruijn[32] = {
@@ -1884,6 +2139,8 @@
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return static_cast<uint32_t>(__builtin_ctzll(v));
+#elif defined(__KERNEL_METAL__)
+ return ctz(v);
#else
//#warning Using software implementation for FindLowestOn(uint64_t)
static const unsigned char DeBruijn[64] = {
@@ -1918,6 +2175,8 @@
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v);
+#elif defined(__KERNEL_METAL__)
+ return clz(v);
#else
const uint32_t* p = reinterpret_cast<const uint32_t*>(&v);
return p[1] ? 32u + FindHighestOn(p[1]) : FindHighestOn(p[0]);
@@ -1955,8 +2214,8 @@
template<uint32_t LOG2DIM>
class Mask
{
- static constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask
- static constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words
+ static __constant__ constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask
+ static __constant__ constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words
uint64_t mWords[WORD_COUNT];
public:
@@ -1973,7 +2232,7 @@
__hostdev__ uint32_t countOn() const
{
uint32_t sum = 0, n = WORD_COUNT;
- for (const uint64_t* w = mWords; n--; ++w)
+ for (__global__ const uint64_t* w = mWords; n--; ++w)
sum += CountOn(*w);
return sum;
}
@@ -1982,7 +2241,7 @@
inline __hostdev__ uint32_t countOn(uint32_t i) const
{
uint32_t n = i >> 6, sum = CountOn( mWords[n] & ((uint64_t(1) << (i & 63u))-1u) );
- for (const uint64_t* w = mWords; n--; ++w) sum += CountOn(*w);
+ for (__global__ const uint64_t* w = mWords; n--; ++w) sum += CountOn(*w);
return sum;
}
@@ -1990,13 +2249,21 @@
class Iterator
{
public:
- __hostdev__ Iterator() : mPos(Mask::SIZE), mParent(nullptr){}
- __hostdev__ Iterator(uint32_t pos, const Mask* parent) : mPos(pos), mParent(parent){}
- Iterator& operator=(const Iterator&) = default;
+ __hostdev__ Iterator()
+ : mPos(Mask::SIZE)
+ , mParent(nullptr)
+ {
+ }
+ __hostdev__ Iterator(uint32_t pos, __global__ const Mask* parent)
+ : mPos(pos)
+ , mParent(parent)
+ {
+ }
+ __global__ Iterator& operator=(__global__ const Iterator&) = default;
__hostdev__ uint32_t operator*() const { return mPos; }
__hostdev__ uint32_t pos() const { return mPos; }
__hostdev__ operator bool() const { return mPos != Mask::SIZE; }
- __hostdev__ Iterator& operator++()
+ __hostdev__ __global__ Iterator& operator++()
{
mPos = mParent->findNext<On>(mPos + 1);
return *this;
@@ -2010,7 +2277,7 @@
private:
uint32_t mPos;
- const Mask* mParent;
+ __global__ const Mask* mParent;
}; // Member class Iterator
using OnIterator = Iterator<true>;
@@ -2034,7 +2301,7 @@
}
/// @brief Copy constructor
- __hostdev__ Mask(const Mask& other)
+ __hostdev__ Mask(__global__ const Mask& other)
{
for (uint32_t i = 0; i < WORD_COUNT; ++i)
mWords[i] = other.mWords[i];
@@ -2042,29 +2309,29 @@
/// @brief Return a const reference to the <i>n</i>th word of the bit mask, for a word of arbitrary size.
template<typename WordT>
- __hostdev__ const WordT& getWord(int n) const
+ __hostdev__ __global__ const WordT& getWord(int n) const
{
NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE);
- return reinterpret_cast<const WordT*>(mWords)[n];
+ return reinterpret_cast<__global__ const WordT*>(mWords)[n];
}
/// @brief Return a reference to the <i>n</i>th word of the bit mask, for a word of arbitrary size.
template<typename WordT>
- __hostdev__ WordT& getWord(int n)
+ __hostdev__ __global__ WordT& getWord(int n)
{
NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE);
- return reinterpret_cast<WordT*>(mWords)[n];
+ return reinterpret_cast<__global__ WordT*>(mWords)[n];
}
/// @brief Assignment operator that works with openvdb::util::NodeMask
template<typename MaskT>
- __hostdev__ Mask& operator=(const MaskT& other)
+ __hostdev__ __global__ Mask& operator=(__global__ const MaskT& other)
{
static_assert(sizeof(Mask) == sizeof(MaskT), "Mismatching sizeof");
static_assert(WORD_COUNT == MaskT::WORD_COUNT, "Mismatching word count");
static_assert(LOG2DIM == MaskT::LOG2DIM, "Mismatching LOG2DIM");
- auto *src = reinterpret_cast<const uint64_t*>(&other);
- uint64_t *dst = mWords;
+ __global__ auto *src = reinterpret_cast<__global__ const uint64_t*>(&other);
+ __global__ uint64_t *dst = mWords;
for (uint32_t i = 0; i < WORD_COUNT; ++i) {
*dst++ = *src++;
}
@@ -2071,7 +2338,7 @@
return *this;
}
- __hostdev__ bool operator==(const Mask& other) const
+ __hostdev__ bool operator==(__global__ const Mask& other) const
{
for (uint32_t i = 0; i < WORD_COUNT; ++i) {
if (mWords[i] != other.mWords[i]) return false;
@@ -2079,16 +2346,18 @@
return true;
}
- __hostdev__ bool operator!=(const Mask& other) const { return !((*this) == other); }
+ __hostdev__ bool operator!=(__global__ const Mask& other) const { return !((*this) == other); }
/// @brief Return true if the given bit is set.
- __hostdev__ bool isOn(uint32_t n) const { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
-
+ __hostdev__ bool isOn(uint32_t n) const __global__ { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isOn(uint32_t n) const __local__ { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
+#endif
/// @brief Return true if the given bit is NOT set.
- __hostdev__ bool isOff(uint32_t n) const { return 0 == (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
+ __hostdev__ bool isOff(uint32_t n) const __global__ { return 0 == (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
/// @brief Return true if all the bits are set in this Mask.
- __hostdev__ bool isOn() const
+ __hostdev__ bool isOn() const __global__
{
for (uint32_t i = 0; i < WORD_COUNT; ++i)
if (mWords[i] != ~uint64_t(0))
@@ -2095,6 +2364,15 @@
return false;
return true;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isOn() const __local__
+ {
+ for (uint32_t i = 0; i < WORD_COUNT; ++i)
+ if (mWords[i] != ~uint64_t(0))
+ return false;
+ return true;
+ }
+#endif
/// @brief Return true if none of the bits are set in this Mask.
__hostdev__ bool isOff() const
@@ -2115,7 +2393,7 @@
__hostdev__ void set(uint32_t n, bool On)
{
#if 1 // switch between branchless
- auto &word = mWords[n >> 6];
+ __global__ auto &word = mWords[n >> 6];
n &= 63;
word &= ~(uint64_t(1) << n);
word |= uint64_t(On) << n;
@@ -2149,40 +2427,40 @@
__hostdev__ void toggle()
{
uint32_t n = WORD_COUNT;
- for (auto* w = mWords; n--; ++w)
+ for (__global__ auto* w = mWords; n--; ++w)
*w = ~*w;
}
__hostdev__ void toggle(uint32_t n) { mWords[n >> 6] ^= uint64_t(1) << (n & 63); }
/// @brief Bitwise intersection
- __hostdev__ Mask& operator&=(const Mask& other)
+ __hostdev__ __global__ Mask& operator&=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= *w2;
return *this;
}
/// @brief Bitwise union
- __hostdev__ Mask& operator|=(const Mask& other)
+ __hostdev__ __global__ Mask& operator|=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 |= *w2;
return *this;
}
/// @brief Bitwise difference
- __hostdev__ Mask& operator-=(const Mask& other)
+ __hostdev__ __global__ Mask& operator-=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= ~*w2;
return *this;
}
/// @brief Bitwise XOR
- __hostdev__ Mask& operator^=(const Mask& other)
+ __hostdev__ __global__ Mask& operator^=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 ^= *w2;
return *this;
}
@@ -2194,7 +2472,7 @@
__hostdev__ uint32_t findFirst() const
{
uint32_t n = 0;
- const uint64_t* w = mWords;
+ __global__ const uint64_t* w = mWords;
for (; n<WORD_COUNT && !(On ? *w : ~*w); ++w, ++n);
return n==WORD_COUNT ? SIZE : (n << 6) + FindLowestOn(On ? *w : ~*w);
}
@@ -2233,53 +2511,73 @@
/// @brief Initialize the member data
template<typename Mat3T, typename Vec3T>
- __hostdev__ void set(const Mat3T& mat, const Mat3T& invMat, const Vec3T& translate, double taper);
+ __hostdev__ void set(__global__ const Mat3T& mat, __global__ const Mat3T& invMat, __global__ const Vec3T& translate, double taper) __global__;
/// @brief Initialize the member data
/// @note The last (4th) row of invMat is actually ignored.
template<typename Mat4T>
- __hostdev__ void set(const Mat4T& mat, const Mat4T& invMat, double taper) {this->set(mat, invMat, mat[3], taper);}
+ __hostdev__ void set(__global__ const Mat4T& mat, __global__ const Mat4T& invMat, double taper) __global__ {this->set(mat, invMat, mat[3], taper);}
template<typename Vec3T>
- __hostdev__ void set(double scale, const Vec3T &translation, double taper);
+ __hostdev__ void set(double scale, __global__ const Vec3T &translation, double taper) __global__;
template<typename Vec3T>
- __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return matMult(mMatD, mVecD, xyz); }
+ __hostdev__ Vec3T applyMap(__global__ const Vec3T& xyz) const { return matMult(mMatD, mVecD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return matMult(mMatF, mVecF, xyz); }
+ __hostdev__ Vec3T applyMapF(__global__ const Vec3T& xyz) const { return matMult(mMatF, mVecF, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return matMult(mMatD, xyz); }
+ __hostdev__ Vec3T applyJacobian(__global__ const Vec3T& xyz) const { return matMult(mMatD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return matMult(mMatF, xyz); }
+ __hostdev__ Vec3T applyJacobianF(__global__ const Vec3T& xyz) const { return matMult(mMatF, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const
+ __hostdev__ Vec3T applyInverseMap(__global__ const Vec3T& xyz) const __global__
{
return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2]));
}
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const
+ __hostdev__ Vec3T applyInverseMap(__local__ const Vec3T& xyz) const __global__
{
+ return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2]));
+ }
+#endif
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseMapF(const __global__ Vec3T& xyz) const __global__
+ {
return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2]));
}
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseMapF(const __local__ Vec3T& xyz) const __global__
+ {
+ return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2]));
+ }
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return matMult(mInvMatD, xyz); }
+ __hostdev__ Vec3T applyInverseJacobian(__global__ const Vec3T& xyz) const __global__ { return matMult(mInvMatD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return matMult(mInvMatF, xyz); }
+ __hostdev__ Vec3T applyInverseJacobianF(__global__ const Vec3T& xyz) const __global__ { return matMult(mInvMatF, xyz); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseJacobianF(__local__ const Vec3T& xyz) const __global__ { return matMult(mInvMatF, xyz); }
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); }
+ __hostdev__ Vec3T applyIJT(__global__ const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); }
+ __hostdev__ Vec3T applyIJTF(__global__ const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); }
}; // Map
template<typename Mat3T, typename Vec3T>
-__hostdev__ inline void Map::set(const Mat3T& mat, const Mat3T& invMat, const Vec3T& translate, double taper)
+__hostdev__ inline void Map::set(__global__ const Mat3T& mat, __global__ const Mat3T& invMat, __global__ const Vec3T& translate, double taper) __global__
{
- float *mf = mMatF, *vf = mVecF, *mif = mInvMatF;
- double *md = mMatD, *vd = mVecD, *mid = mInvMatD;
+ __global__ float * mf = mMatF, *vf = mVecF;
+ __global__ float* mif = mInvMatF;
+ __global__ double *md = mMatD, *vd = mVecD;
+ __global__ double* mid = mInvMatD;
mTaperF = static_cast<float>(taper);
mTaperD = taper;
for (int i = 0; i < 3; ++i) {
@@ -2295,8 +2593,19 @@
}
template<typename Vec3T>
-__hostdev__ inline void Map::set(double dx, const Vec3T &trans, double taper)
+__hostdev__ inline void Map::set(double dx, __global__ const Vec3T &trans, double taper) __global__
{
+#if defined __KERNEL_METAL__
+ const float mat[3][3] = {
+ {(float)dx, 0.0, 0.0}, // row 0
+ {0.0, (float)dx, 0.0}, // row 1
+ {0.0, 0.0, (float)dx}, // row 2
+ }, idx = 1.0/(float)dx, invMat[3][3] = {
+ {idx, 0.0, 0.0}, // row 0
+ {0.0, idx, 0.0}, // row 1
+ {0.0, 0.0, idx}, // row 2
+ };
+#else
const double mat[3][3] = {
{dx, 0.0, 0.0}, // row 0
{0.0, dx, 0.0}, // row 1
@@ -2306,6 +2615,7 @@
{0.0, idx, 0.0}, // row 1
{0.0, 0.0, idx}, // row 2
};
+#endif
this->set(mat, invMat, trans, taper);
}
@@ -2313,7 +2623,7 @@
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData
{
- static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less!
+ static __constant__ const int MaxNameSize = 256;// due to NULL termination the maximum length is one less!
int64_t mByteOffset; // byte offset to the blind data, relative to the GridData.
uint64_t mElementCount; // number of elements, e.g. point count
uint32_t mFlags; // flags
@@ -2328,10 +2638,10 @@
return blindDataCount * sizeof(GridBlindMetaData);
}
- __hostdev__ void setBlindData(void *ptr) { mByteOffset = PtrDiff(ptr, this); }
+ __hostdev__ void setBlindData(__global__ void *ptr) __global__ { mByteOffset = PtrDiff(ptr, this); }
template <typename T>
- __hostdev__ const T* getBlindData() const { return PtrAdd<T>(this, mByteOffset); }
+ __hostdev__ __global__ const T* getBlindData() const { return PtrAdd<T>(this, mByteOffset); }
}; // GridBlindMetaData
@@ -2430,7 +2740,7 @@
/// @note No client code should (or can) interface with this struct so it can safely be ignored!
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
{// sizeof(GridData) = 672B
- static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less
+ static __constant__ const int MaxNameSize = 256;// due to NULL termination the maximum length is one less
uint64_t mMagic; // 8B (0) magic to validate it is valid grid data.
uint64_t mChecksum; // 8B (8). Checksum of grid buffer.
Version mVersion;// 4B (16) major, minor, and patch version numbers
@@ -2450,8 +2760,8 @@
uint64_t mData1, mData2;// 2x8B (656) padding to 32 B alignment. mData1 is use for the total number of values indexed by an IndexGrid
// Set and unset various bit flags
- __hostdev__ void setFlagsOff() { mFlags = uint32_t(0); }
- __hostdev__ void setMinMaxOn(bool on = true)
+ __hostdev__ void setFlagsOff() __global__ { mFlags = uint32_t(0); }
+ __hostdev__ void setMinMaxOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasMinMax);
@@ -2459,7 +2769,7 @@
mFlags &= ~static_cast<uint32_t>(GridFlags::HasMinMax);
}
}
- __hostdev__ void setBBoxOn(bool on = true)
+ __hostdev__ void setBBoxOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasBBox);
@@ -2467,7 +2777,7 @@
mFlags &= ~static_cast<uint32_t>(GridFlags::HasBBox);
}
}
- __hostdev__ void setLongGridNameOn(bool on = true)
+ __hostdev__ void setLongGridNameOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasLongGridName);
@@ -2475,7 +2785,7 @@
mFlags &= ~static_cast<uint32_t>(GridFlags::HasLongGridName);
}
}
- __hostdev__ void setAverageOn(bool on = true)
+ __hostdev__ void setAverageOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasAverage);
@@ -2483,7 +2793,7 @@
mFlags &= ~static_cast<uint32_t>(GridFlags::HasAverage);
}
}
- __hostdev__ void setStdDeviationOn(bool on = true)
+ __hostdev__ void setStdDeviationOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasStdDeviation);
@@ -2491,7 +2801,7 @@
mFlags &= ~static_cast<uint32_t>(GridFlags::HasStdDeviation);
}
}
- __hostdev__ void setBreadthFirstOn(bool on = true)
+ __hostdev__ void setBreadthFirstOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::IsBreadthFirst);
@@ -2502,37 +2812,49 @@
// Affine transformations based on double precision
template<typename Vec3T>
- __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return mMap.applyMap(xyz); } // Pos: index -> world
+ __hostdev__ Vec3T applyMap(__global__ const Vec3T& xyz) const __global__ { return mMap.applyMap(xyz); } // Pos: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { return mMap.applyInverseMap(xyz); } // Pos: world -> index
+ __hostdev__ Vec3T applyInverseMap(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMap(xyz); } // Pos: world -> index
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return mMap.applyJacobian(xyz); } // Dir: index -> world
+ __hostdev__ Vec3T applyInverseMap(__local__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMap(xyz); } // Pos: world -> index
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return mMap.applyInverseJacobian(xyz); } // Dir: world -> index
+ __hostdev__ Vec3T applyJacobian(__global__ const Vec3T& xyz) const __global__ { return mMap.applyJacobian(xyz); } // Dir: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return mMap.applyIJT(xyz); }
+ __hostdev__ Vec3T applyInverseJacobian(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseJacobian(xyz); } // Dir: world -> index
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyIJT(__global__ const Vec3T& xyz) const __global__ { return mMap.applyIJT(xyz); }
// Affine transformations based on single precision
template<typename Vec3T>
- __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return mMap.applyMapF(xyz); } // Pos: index -> world
+ __hostdev__ Vec3T applyMapF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyMapF(xyz); } // Pos: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { return mMap.applyInverseMapF(xyz); } // Pos: world -> index
+ __hostdev__ Vec3T applyInverseMapF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMapF(xyz); } // Pos: world -> index
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return mMap.applyJacobianF(xyz); } // Dir: index -> world
+ __hostdev__ Vec3T applyInverseMapF(__local__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMapF(xyz); } // Pos: world -> index
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index
+ __hostdev__ Vec3T applyJacobianF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyJacobianF(xyz); } // Dir: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); }
+ __hostdev__ Vec3T applyInverseJacobianF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseJacobianF(__local__ const Vec3T& xyz) const __global__ { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index
+#endif
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyIJTF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyIJTF(xyz); }
// @brief Return a non-const void pointer to the tree
- __hostdev__ void* treePtr() { return this + 1; }
+ __hostdev__ __global__ void* treePtr() __global__ { return this + 1; }
// @brief Return a const void pointer to the tree
- __hostdev__ const void* treePtr() const { return this + 1; }
+ __hostdev__ __global__ const void* treePtr() const __global__ { return this + 1; }
/// @brief Returns a const reference to the blindMetaData at the specified linear offset.
///
/// @warning The linear offset is assumed to be in the valid range
- __hostdev__ const GridBlindMetaData* blindMetaData(uint32_t n) const
+ __hostdev__ __global__ const GridBlindMetaData* blindMetaData(uint32_t n) const __global__
{
NANOVDB_ASSERT(n < mBlindMetadataCount);
return PtrAdd<GridBlindMetaData>(this, mBlindMetadataOffset) + n;
@@ -2552,8 +2874,17 @@
///
/// @note This the API of this class to interface with client code
template<typename TreeT>
-class Grid : private GridData
+class Grid
+#if !defined(__KERNEL_METAL__)
+ : private GridData
+#endif
{
+#if defined(__KERNEL_METAL__)
+ GridData _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
public:
using TreeType = TreeT;
using RootType = typename TreeT::RootType;
@@ -2566,183 +2897,195 @@
/// @brief Disallow constructions, copy and assignment
///
/// @note Only a Serializer, defined elsewhere, can instantiate this class
- Grid(const Grid&) = delete;
- Grid& operator=(const Grid&) = delete;
+ Grid(__global__ const Grid&) __global__ = delete;
+ __global__ Grid& operator=(__global__ const Grid&) __global__ = delete;
~Grid() = delete;
- __hostdev__ Version version() const { return DataType::mVersion; }
+ __hostdev__ Version version() const __global__ { return BASE(mVersion); }
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return memory usage in bytes for this class only.
__hostdev__ static uint64_t memUsage() { return sizeof(GridData); }
/// @brief Return the memory footprint of the entire grid, i.e. including all nodes and blind data
- __hostdev__ uint64_t gridSize() const { return DataType::mGridSize; }
+ __hostdev__ uint64_t gridSize() const __global__ { return BASE(mGridSize); }
/// @brief Return index of this grid in the buffer
- __hostdev__ uint32_t gridIndex() const { return DataType::mGridIndex; }
+ __hostdev__ uint32_t gridIndex() const __global__ { return BASE(mGridIndex); }
/// @brief Return total number of grids in the buffer
- __hostdev__ uint32_t gridCount() const { return DataType::mGridCount; }
+ __hostdev__ uint32_t gridCount() const __global__ { return BASE(mGridCount); }
/// @brief @brief Return the total number of values indexed by this IndexGrid
///
/// @note This method is only defined for IndexGrid = NanoGrid<ValueIndex>
template <typename T = BuildType>
- __hostdev__ typename enable_if<is_same<T, ValueIndex>::value, uint64_t>::type valueCount() const {return DataType::mData1;}
+ __hostdev__ typename enable_if<is_same<T, ValueIndex>::value, uint64_t>::type valueCount() const {return BASE(mData1);}
/// @brief Return a const reference to the tree
- __hostdev__ const TreeT& tree() const { return *reinterpret_cast<const TreeT*>(this->treePtr()); }
+ __hostdev__ __global__ const TreeT& tree() const __global__ { return *reinterpret_cast<__global__ const TreeT*>(BASE(treePtr)()); }
/// @brief Return a non-const reference to the tree
- __hostdev__ TreeT& tree() { return *reinterpret_cast<TreeT*>(this->treePtr()); }
+ __hostdev__ __global__ TreeT& tree() __global__ { return *reinterpret_cast<__global__ TreeT*>(BASE(treePtr)()); }
/// @brief Return a new instance of a ReadAccessor used to access values in this grid
- __hostdev__ AccessorType getAccessor() const { return AccessorType(this->tree().root()); }
+ __hostdev__ AccessorType getAccessor() const __global__ { return AccessorType(this->tree().root()); }
/// @brief Return a const reference to the size of a voxel in world units
- __hostdev__ const Vec3R& voxelSize() const { return DataType::mVoxelSize; }
+ __hostdev__ const __global__ Vec3R& voxelSize() const __global__ { return BASE(mVoxelSize); }
/// @brief Return a const reference to the Map for this grid
- __hostdev__ const Map& map() const { return DataType::mMap; }
+ __hostdev__ const __global__ Map& map() const __global__ { return BASE(mMap); }
/// @brief world to index space transformation
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndex(const Vec3T& xyz) const { return this->applyInverseMap(xyz); }
+ __hostdev__ Vec3T worldToIndex(__global__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMap)(xyz); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T worldToIndex(__local__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMap)(xyz); }
+#endif
+
/// @brief index to world space transformation
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorld(const Vec3T& xyz) const { return this->applyMap(xyz); }
+ __hostdev__ Vec3T indexToWorld(__global__ const Vec3T& xyz) const __global__ { return this->applyMap(xyz); }
/// @brief transformation from index space direction to world space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldDir(const Vec3T& dir) const { return this->applyJacobian(dir); }
+ __hostdev__ Vec3T indexToWorldDir(__global__ const Vec3T& dir) const __global__ { return this->applyJacobian(dir); }
/// @brief transformation from world space direction to index space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndexDir(const Vec3T& dir) const { return this->applyInverseJacobian(dir); }
+ __hostdev__ Vec3T worldToIndexDir(__global__ const Vec3T& dir) const __global__ { return this->applyInverseJacobian(dir); }
/// @brief transform the gradient from index space to world space.
/// @details Applies the inverse jacobian transform map.
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldGrad(const Vec3T& grad) const { return this->applyIJT(grad); }
+ __hostdev__ Vec3T indexToWorldGrad(__global__ const Vec3T& grad) const __global__ { return this->applyIJT(grad); }
/// @brief world to index space transformation
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndexF(const Vec3T& xyz) const { return this->applyInverseMapF(xyz); }
+ __hostdev__ Vec3T worldToIndexF(__global__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMapF)(xyz); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T worldToIndexF(__local__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMapF)(xyz); }
+#endif
/// @brief index to world space transformation
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldF(const Vec3T& xyz) const { return this->applyMapF(xyz); }
+ __hostdev__ Vec3T indexToWorldF(__global__ const Vec3T& xyz) const __global__ { return this->applyMapF(xyz); }
/// @brief transformation from index space direction to world space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldDirF(const Vec3T& dir) const { return this->applyJacobianF(dir); }
+ __hostdev__ Vec3T indexToWorldDirF(__global__ const Vec3T& dir) const __global__ { return this->applyJacobianF(dir); }
/// @brief transformation from world space direction to index space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndexDirF(const Vec3T& dir) const { return this->applyInverseJacobianF(dir); }
+ __hostdev__ Vec3T worldToIndexDirF(__global__ const Vec3T& dir) const __global__ { return BASE(applyInverseJacobianF)(dir); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T worldToIndexDirF(__local__ const Vec3T& dir) const __global__ { return BASE(applyInverseJacobianF)(dir); }
+#endif
/// @brief Transforms the gradient from index space to world space.
/// @details Applies the inverse jacobian transform map.
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); }
+ __hostdev__ Vec3T indexToWorldGradF(__global__ const Vec3T& grad) const __global__ { return BASE(applyIJTF(grad)); }
/// @brief Computes a AABB of active values in world space
- __hostdev__ const BBox<Vec3R>& worldBBox() const { return DataType::mWorldBBox; }
+ __hostdev__ __global__ const BBox<Vec3R>& worldBBox() const __global__ { return BASE(mWorldBBox); }
/// @brief Computes a AABB of active values in index space
///
/// @note This method is returning a floating point bounding box and not a CoordBBox. This makes
/// it more useful for clipping rays.
- __hostdev__ const BBox<CoordType>& indexBBox() const { return this->tree().bbox(); }
+ __hostdev__ __global__ const BBox<CoordType>& indexBBox() const __global__ { return this->tree().bbox(); }
/// @brief Return the total number of active voxels in this tree.
- __hostdev__ uint64_t activeVoxelCount() const { return this->tree().activeVoxelCount(); }
+ __hostdev__ uint64_t activeVoxelCount() const __global__ { return this->tree().activeVoxelCount(); }
/// @brief Methods related to the classification of this grid
- __hostdev__ bool isValid() const { return DataType::mMagic == NANOVDB_MAGIC_NUMBER; }
- __hostdev__ const GridType& gridType() const { return DataType::mGridType; }
- __hostdev__ const GridClass& gridClass() const { return DataType::mGridClass; }
- __hostdev__ bool isLevelSet() const { return DataType::mGridClass == GridClass::LevelSet; }
- __hostdev__ bool isFogVolume() const { return DataType::mGridClass == GridClass::FogVolume; }
- __hostdev__ bool isStaggered() const { return DataType::mGridClass == GridClass::Staggered; }
- __hostdev__ bool isPointIndex() const { return DataType::mGridClass == GridClass::PointIndex; }
- __hostdev__ bool isGridIndex() const { return DataType::mGridClass == GridClass::IndexGrid; }
- __hostdev__ bool isPointData() const { return DataType::mGridClass == GridClass::PointData; }
- __hostdev__ bool isMask() const { return DataType::mGridClass == GridClass::Topology; }
- __hostdev__ bool isUnknown() const { return DataType::mGridClass == GridClass::Unknown; }
- __hostdev__ bool hasMinMax() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasMinMax); }
- __hostdev__ bool hasBBox() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasBBox); }
- __hostdev__ bool hasLongGridName() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasLongGridName); }
- __hostdev__ bool hasAverage() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasAverage); }
- __hostdev__ bool hasStdDeviation() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasStdDeviation); }
- __hostdev__ bool isBreadthFirst() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::IsBreadthFirst); }
+ __hostdev__ bool isValid() const __global__ { return BASE(mMagic) == NANOVDB_MAGIC_NUMBER; }
+ __hostdev__ const __global__ GridType& gridType() const __global__ { return BASE(mGridType); }
+ __hostdev__ const __global__ GridClass& gridClass() const __global__ { return BASE(mGridClass); }
+ __hostdev__ bool isLevelSet() const __global__ { return BASE(mGridClass) == GridClass::LevelSet; }
+ __hostdev__ bool isFogVolume() const __global__ { return BASE(mGridClass) == GridClass::FogVolume; }
+ __hostdev__ bool isStaggered() const __global__ { return BASE(mGridClass) == GridClass::Staggered; }
+ __hostdev__ bool isPointIndex() const __global__ { return BASE(mGridClass) == GridClass::PointIndex; }
+ __hostdev__ bool isGridIndex() const __global__ { return BASE(mGridClass) == GridClass::IndexGrid; }
+ __hostdev__ bool isPointData() const __global__ { return BASE(mGridClass) == GridClass::PointData; }
+ __hostdev__ bool isMask() const __global__ { return BASE(mGridClass) == GridClass::Topology; }
+ __hostdev__ bool isUnknown() const __global__ { return BASE(mGridClass) == GridClass::Unknown; }
+ __hostdev__ bool hasMinMax() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasMinMax); }
+ __hostdev__ bool hasBBox() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasBBox); }
+ __hostdev__ bool hasLongGridName() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasLongGridName); }
+ __hostdev__ bool hasAverage() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasAverage); }
+ __hostdev__ bool hasStdDeviation() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasStdDeviation); }
+ __hostdev__ bool isBreadthFirst() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::IsBreadthFirst); }
/// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size.
/// This allows for sequential access to the nodes.
template <typename NodeT>
- __hostdev__ bool isSequential() const { return NodeT::FIXED_SIZE && this->isBreadthFirst(); }
+ __hostdev__ bool isSequential() const __global__ { return NodeT::FIXED_SIZE && this->isBreadthFirst(); }
/// @brief return true if the specified node level is layed out breadth-first in memory and has a fixed size.
/// This allows for sequential access to the nodes.
template <int LEVEL>
- __hostdev__ bool isSequential() const { return NodeTrait<TreeT,LEVEL>::type::FIXED_SIZE && this->isBreadthFirst(); }
+ __hostdev__ bool isSequential() const __global__ { return NodeTrait<TreeT,LEVEL>::type::FIXED_SIZE && this->isBreadthFirst(); }
/// @brief Return a c-string with the name of this grid
- __hostdev__ const char* gridName() const
+ __hostdev__ __global__ const char* gridName() const __global__
{
if (this->hasLongGridName()) {
NANOVDB_ASSERT(DataType::mBlindMetadataCount>0);
- const auto &metaData = this->blindMetaData(DataType::mBlindMetadataCount-1);// always the last
+ __global__ const auto &metaData = this->blindMetaData(BASE(mBlindMetadataCount)-1);// always the last
NANOVDB_ASSERT(metaData.mDataClass == GridBlindDataClass::GridName);
return metaData.template getBlindData<const char>();
}
- return DataType::mGridName;
+ return BASE(mGridName);
}
/// @brief Return a c-string with the name of this grid, truncated to 255 characters
- __hostdev__ const char* shortGridName() const { return DataType::mGridName; }
-
+ __hostdev__ __global__ const char* shortGridName() const __global__ { return BASE(mGridName); }
/// @brief Return checksum of the grid buffer.
- __hostdev__ uint64_t checksum() const { return DataType::mChecksum; }
+ __hostdev__ uint64_t checksum() const __global__ { return BASE(mChecksum); }
/// @brief Return true if this grid is empty, i.e. contains no values or nodes.
- __hostdev__ bool isEmpty() const { return this->tree().isEmpty(); }
+ __hostdev__ bool isEmpty() const __global__ { return this->tree().isEmpty(); }
/// @brief Return the count of blind-data encoded in this grid
- __hostdev__ uint32_t blindDataCount() const { return DataType::mBlindMetadataCount; }
+ __hostdev__ uint32_t blindDataCount() const __global__ { return BASE(mBlindMetadataCount); }
/// @brief Return the index of the blind data with specified semantic if found, otherwise -1.
- __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const;
+ __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const __global__;
/// @brief Returns a const pointer to the blindData at the specified linear offset.
///
/// @warning Point might be NULL and the linear offset is assumed to be in the valid range
- __hostdev__ const void* blindData(uint32_t n) const
+ __hostdev__ __global__ const void* blindData(uint32_t n) const __global__
{
- if (DataType::mBlindMetadataCount == 0u) {
+ if (BASE(mBlindMetadataCount) == 0u) {
return nullptr;
}
NANOVDB_ASSERT(n < DataType::mBlindMetadataCount);
return this->blindMetaData(n).template getBlindData<void>();
}
+
+ __hostdev__ __global__ const GridBlindMetaData& blindMetaData(uint32_t n) const __global__ { return *BASE(blindMetaData)(n); }
- __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return *DataType::blindMetaData(n); }
-
private:
static_assert(sizeof(GridData) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(GridData) is misaligned");
}; // Class Grid
template<typename TreeT>
-__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
+__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const __global__
{
for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i)
if (this->blindMetaData(i).mSemantic == semantic)
@@ -2762,14 +3105,14 @@
uint64_t mVoxelCount;// 8B, total number of active voxels in the root and all its child nodes.
// No padding since it's always 32B aligned
template <typename RootT>
- __hostdev__ void setRoot(const RootT* root) { mNodeOffset[3] = PtrDiff(root, this); }
+ __hostdev__ void setRoot(__global__ const RootT* root) __global__ { mNodeOffset[3] = PtrDiff(root, this); }
template <typename RootT>
- __hostdev__ RootT* getRoot() { return PtrAdd<RootT>(this, mNodeOffset[3]); }
+ __hostdev__ __global__ RootT* getRoot() __global__ { return PtrAdd<RootT>(this, mNodeOffset[3]); }
template <typename RootT>
- __hostdev__ const RootT* getRoot() const { return PtrAdd<RootT>(this, mNodeOffset[3]); }
+ __hostdev__ __global__ const RootT* getRoot() const __global__ { return PtrAdd<RootT>(this, mNodeOffset[3]); }
template <typename NodeT>
- __hostdev__ void setFirstNode(const NodeT* node)
+ __hostdev__ void setFirstNode(__global__ const NodeT* node) __global__
{
mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0;
}
@@ -2795,8 +3138,17 @@
/// @brief VDB Tree, which is a thin wrapper around a RootNode.
template<typename RootT>
-class Tree : private TreeData<RootT::LEVEL>
+class Tree
+#if !defined(__KERNEL_METAL__)
+ : private TreeData<RootT::LEVEL>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ TreeData<RootT::LEVEL> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
static_assert(RootT::LEVEL == 3, "Tree depth is not supported");
static_assert(RootT::ChildNodeType::LOG2DIM == 5, "Tree configuration is not supported");
static_assert(RootT::ChildNodeType::ChildNodeType::LOG2DIM == 4, "Tree configuration is not supported");
@@ -2817,47 +3169,54 @@
using Node0 = LeafNodeType;
/// @brief This class cannot be constructed or deleted
- Tree() = delete;
- Tree(const Tree&) = delete;
- Tree& operator=(const Tree&) = delete;
- ~Tree() = delete;
+ Tree() __global__ = delete;
+ Tree(__global__ const Tree&) __global__ = delete;
+ __global__ Tree& operator=(__global__ const Tree&) __global__ = delete;
+ ~Tree() __global__ = delete;
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief return memory usage in bytes for the class
__hostdev__ static uint64_t memUsage() { return sizeof(DataType); }
- __hostdev__ RootT& root() { return *DataType::template getRoot<RootT>(); }
+ __hostdev__ __global__ RootT& root() __global__ { return *BASE(template) getRoot<RootT>(); }
- __hostdev__ const RootT& root() const { return *DataType::template getRoot<RootT>(); }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *BASE(template) getRoot<RootT>(); }
- __hostdev__ AccessorType getAccessor() const { return AccessorType(this->root()); }
+ __hostdev__ AccessorType getAccessor() const __global__ { return AccessorType(this->root()); }
/// @brief Return the value of the given voxel (regardless of state or location in the tree.)
- __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->root().getValue(ijk); }
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__ { return this->root().getValue(ijk); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__ { return this->root().getValue(ijk); }
+#endif
/// @brief Return the active state of the given voxel (regardless of state or location in the tree.)
- __hostdev__ bool isActive(const CoordType& ijk) const { return this->root().isActive(ijk); }
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__ { return this->root().isActive(ijk); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __global__ { return this->root().isActive(ijk); }
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__ { return this->root().isActive(ijk); }
+#endif
/// @brief Return true if this tree is empty, i.e. contains no values or nodes
- __hostdev__ bool isEmpty() const { return this->root().isEmpty(); }
+ __hostdev__ bool isEmpty() const __global__ { return this->root().isEmpty(); }
/// @brief Combines the previous two methods in a single call
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->root().probeValue(ijk, v); }
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const { return this->root().probeValue(ijk, v); }
/// @brief Return a const reference to the background value.
- __hostdev__ const ValueType& background() const { return this->root().background(); }
+ __hostdev__ __global__ const ValueType& background() const __global__ { return this->root().background(); }
/// @brief Sets the extrema values of all the active values in this tree, i.e. in all nodes of the tree
- __hostdev__ void extrema(ValueType& min, ValueType& max) const;
+ __hostdev__ void extrema(__global__ ValueType& min, __global__ ValueType& max) const __global__;
/// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree
- __hostdev__ const BBox<CoordType>& bbox() const { return this->root().bbox(); }
+ __hostdev__ __global__ const BBox<CoordType>& bbox() const __global__ { return this->root().bbox(); }
/// @brief Return the total number of active voxels in this tree.
- __hostdev__ uint64_t activeVoxelCount() const { return DataType::mVoxelCount; }
+ __hostdev__ uint64_t activeVoxelCount() const __global__ { return BASE(mVoxelCount); }
/// @brief Return the total number of active tiles at the specified level of the tree.
///
@@ -2864,23 +3223,23 @@
/// @details level = 1,2,3 corresponds to active tile count in lower internal nodes, upper
/// internal nodes, and the root level. Note active values at the leaf level are
/// referred to as active voxels (see activeVoxelCount defined above).
- __hostdev__ const uint32_t& activeTileCount(uint32_t level) const
+ __hostdev__ __global__ const uint32_t& activeTileCount(uint32_t level) const __global__
{
NANOVDB_ASSERT(level > 0 && level <= 3);// 1, 2, or 3
- return DataType::mTileCount[level - 1];
+ return BASE(mTileCount)[level - 1];
}
template<typename NodeT>
- __hostdev__ uint32_t nodeCount() const
+ __hostdev__ uint32_t nodeCount() const __global__
{
static_assert(NodeT::LEVEL < 3, "Invalid NodeT");
- return DataType::mNodeCount[NodeT::LEVEL];
+ return BASE(mNodeCount)[NodeT::LEVEL];
}
- __hostdev__ uint32_t nodeCount(int level) const
+ __hostdev__ uint32_t nodeCount(int level) const __global__
{
NANOVDB_ASSERT(level < 3);
- return DataType::mNodeCount[level];
+ return BASE(mNodeCount)[level];
}
/// @brief return a pointer to the first node of the specified type
@@ -2887,9 +3246,9 @@
///
/// @warning Note it may return NULL if no nodes exist
template <typename NodeT>
- __hostdev__ NodeT* getFirstNode()
+ __hostdev__ __global__ NodeT* getFirstNode() __global__
{
- const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL];
+ const uint64_t offset = BASE(mNodeOffset)[NodeT::LEVEL];
return offset>0 ? PtrAdd<NodeT>(this, offset) : nullptr;
}
@@ -2897,9 +3256,9 @@
///
/// @warning Note it may return NULL if no nodes exist
template <typename NodeT>
- __hostdev__ const NodeT* getFirstNode() const
+ __hostdev__ __global__ const NodeT* getFirstNode() const __global__
{
- const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL];
+ const uint64_t offset = BASE(mNodeOffset)[NodeT::LEVEL];
return offset>0 ? PtrAdd<NodeT>(this, offset) : nullptr;
}
@@ -2907,8 +3266,8 @@
///
/// @warning Note it may return NULL if no nodes exist
template <int LEVEL>
- __hostdev__ typename NodeTrait<RootT, LEVEL>::type*
- getFirstNode()
+ __hostdev__ __global__ typename NodeTrait<RootT, LEVEL>::type*
+ getFirstNode() __global__
{
return this->template getFirstNode<typename NodeTrait<RootT,LEVEL>::type>();
}
@@ -2917,27 +3276,28 @@
///
/// @warning Note it may return NULL if no nodes exist
template <int LEVEL>
- __hostdev__ const typename NodeTrait<RootT, LEVEL>::type*
- getFirstNode() const
+ __hostdev__ __global__ const typename NodeTrait<RootT, LEVEL>::type*
+ getFirstNode() const __global__
{
return this->template getFirstNode<typename NodeTrait<RootT,LEVEL>::type>();
}
/// @brief Template specializations of getFirstNode
- __hostdev__ LeafNodeType* getFirstLeaf() {return this->getFirstNode<LeafNodeType>();}
- __hostdev__ const LeafNodeType* getFirstLeaf() const {return this->getFirstNode<LeafNodeType>();}
- __hostdev__ typename NodeTrait<RootT, 1>::type* getFirstLower() {return this->getFirstNode<1>();}
- __hostdev__ const typename NodeTrait<RootT, 1>::type* getFirstLower() const {return this->getFirstNode<1>();}
- __hostdev__ typename NodeTrait<RootT, 2>::type* getFirstUpper() {return this->getFirstNode<2>();}
- __hostdev__ const typename NodeTrait<RootT, 2>::type* getFirstUpper() const {return this->getFirstNode<2>();}
+ __hostdev__ __global__ LeafNodeType* getFirstLeaf() {return this->getFirstNode<LeafNodeType>();}
+ __hostdev__ __global__ const LeafNodeType* getFirstLeaf() const {return this->getFirstNode<LeafNodeType>();}
+ __hostdev__ __global__ typename NodeTrait<RootT, 1>::type* getFirstLower() {return this->getFirstNode<1>();}
+ __hostdev__ __global__ const typename NodeTrait<RootT, 1>::type* getFirstLower() const {return this->getFirstNode<1>();}
+ __hostdev__ __global__ typename NodeTrait<RootT, 2>::type* getFirstUpper() {return this->getFirstNode<2>();}
+ __hostdev__ __global__ const typename NodeTrait<RootT, 2>::type* getFirstUpper() const {return this->getFirstNode<2>();}
private:
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(TreeData) is misaligned");
+#undef BASE
}; // Tree class
template<typename RootT>
-__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
+__hostdev__ void Tree<RootT>::extrema(__global__ ValueType& min, __global__ ValueType& max) const __global__
{
min = this->root().minimum();
max = this->root().maximum();
@@ -2955,13 +3315,13 @@
using BuildT = typename ChildT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool
using CoordT = typename ChildT::CoordType;
using StatsT = typename ChildT::FloatType;
- static constexpr bool FIXED_SIZE = false;
+ static __constant__ constexpr bool FIXED_SIZE = false;
/// @brief Return a key based on the coordinates of a voxel
#ifdef USE_SINGLE_ROOT_KEY
using KeyT = uint64_t;
template <typename CoordType>
- __hostdev__ static KeyT CoordToKey(const CoordType& ijk)
+ __hostdev__ static KeyT CoordToKey(__global__ const CoordType& ijk)
{
static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof");
static_assert(32 - ChildT::TOTAL <= 21, "Cannot use 64 bit root keys");
@@ -2969,9 +3329,20 @@
(KeyT(uint32_t(ijk[1]) >> ChildT::TOTAL) << 21) | // y is the middle 21 bits
(KeyT(uint32_t(ijk[0]) >> ChildT::TOTAL) << 42); // x is the upper 21 bits
}
- __hostdev__ static CoordT KeyToCoord(const KeyT& key)
+#if defined(__KERNEL_METAL__)
+ template <typename CoordType>
+ __hostdev__ static KeyT CoordToKey(__local__ const CoordType& ijk)
{
- static constexpr uint64_t MASK = (1u << 21) - 1;
+ static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof");
+ static_assert(32 - ChildT::TOTAL <= 21, "Cannot use 64 bit root keys");
+ return (KeyT(uint32_t(ijk[2]) >> ChildT::TOTAL)) | // z is the lower 21 bits
+ (KeyT(uint32_t(ijk[1]) >> ChildT::TOTAL) << 21) | // y is the middle 21 bits
+ (KeyT(uint32_t(ijk[0]) >> ChildT::TOTAL) << 42); // x is the upper 21 bits
+ }
+#endif
+ static __constant__ constexpr uint64_t MASK = (1u << 21) - 1;
+ __hostdev__ static CoordT KeyToCoord(__global__ const KeyT& key)
+ {
return CoordT(((key >> 42) & MASK) << ChildT::TOTAL,
((key >> 21) & MASK) << ChildT::TOTAL,
(key & MASK) << ChildT::TOTAL);
@@ -2978,8 +3349,8 @@
}
#else
using KeyT = CoordT;
- __hostdev__ static KeyT CoordToKey(const CoordT& ijk) { return ijk & ~ChildT::MASK; }
- __hostdev__ static CoordT KeyToCoord(const KeyT& key) { return key; }
+ __hostdev__ static KeyT CoordToKey(__global__ const CoordT& ijk) { return ijk & ~ChildT::MASK; }
+ __hostdev__ static CoordT KeyToCoord(__global__ const KeyT& key) { return key; }
#endif
BBox<CoordT> mBBox; // 24B. AABB of active values in index space.
uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node
@@ -3000,13 +3371,13 @@
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) Tile
{
template <typename CoordType>
- __hostdev__ void setChild(const CoordType& k, const ChildT *ptr, const RootData *data)
+ __hostdev__ void setChild(__global__ const CoordType& k, __global__ const ChildT *ptr, __global__ const RootData *data)
{
key = CoordToKey(k);
child = PtrDiff(ptr, data);
}
template <typename CoordType, typename ValueType>
- __hostdev__ void setValue(const CoordType& k, bool s, const ValueType &v)
+ __hostdev__ void setValue(__global__ const CoordType& k, bool s, __global__ const ValueType &v)
{
key = CoordToKey(k);
state = s;
@@ -3013,10 +3384,10 @@
value = v;
child = 0;
}
- __hostdev__ bool isChild() const { return child!=0; }
- __hostdev__ bool isValue() const { return child==0; }
- __hostdev__ bool isActive() const { return child==0 && state; }
- __hostdev__ CoordT origin() const { return KeyToCoord(key); }
+ __hostdev__ bool isChild() const __global__ { return child!=0; }
+ __hostdev__ bool isValue() const __global__ { return child==0; }
+ __hostdev__ bool isActive() const __global__ { return child==0 && state; }
+ __hostdev__ CoordT origin() const __global__ { return KeyToCoord(key); }
KeyT key; // USE_SINGLE_ROOT_KEY ? 8B : 12B
int64_t child; // 8B. signed byte offset from this node to the child node. 0 means it is a constant tile, so use value.
uint32_t state; // 4B. state of tile value
@@ -3026,53 +3397,64 @@
/// @brief Returns a non-const reference to the tile at the specified linear offset.
///
/// @warning The linear offset is assumed to be in the valid range
- __hostdev__ const Tile* tile(uint32_t n) const
+ __hostdev__ __global__ const Tile* tile(uint32_t n) const
{
NANOVDB_ASSERT(n < mTableSize);
- return reinterpret_cast<const Tile*>(this + 1) + n;
+ return reinterpret_cast<__global__ const Tile*>(this + 1) + n;
}
- __hostdev__ Tile* tile(uint32_t n)
+ __hostdev__ __global__ Tile* tile(uint32_t n)
{
NANOVDB_ASSERT(n < mTableSize);
- return reinterpret_cast<Tile*>(this + 1) + n;
+ return reinterpret_cast<__global__ Tile*>(this + 1) + n;
}
/// @brief Returns a const reference to the child node in the specified tile.
///
/// @warning A child node is assumed to exist in the specified tile
- __hostdev__ ChildT* getChild(const Tile* tile)
+ __hostdev__ __global__ ChildT* getChild(__global__ const Tile* tile) __global__
{
NANOVDB_ASSERT(tile->child);
return PtrAdd<ChildT>(this, tile->child);
}
- __hostdev__ const ChildT* getChild(const Tile* tile) const
+ __hostdev__ __global__ const ChildT* getChild(__global__ const Tile* tile) const __global__
{
NANOVDB_ASSERT(tile->child);
return PtrAdd<ChildT>(this, tile->child);
}
- __hostdev__ const ValueT& getMin() const { return mMinimum; }
- __hostdev__ const ValueT& getMax() const { return mMaximum; }
- __hostdev__ const StatsT& average() const { return mAverage; }
- __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; }
+ __hostdev__ __global__ const ValueT& getMin() const { return mMinimum; }
+ __hostdev__ __global__ const ValueT& getMax() const { return mMaximum; }
+ __hostdev__ __global__ const StatsT& average() const { return mAverage; }
+ __hostdev__ __global__ const StatsT& stdDeviation() const { return mStdDevi; }
- __hostdev__ void setMin(const ValueT& v) { mMinimum = v; }
- __hostdev__ void setMax(const ValueT& v) { mMaximum = v; }
- __hostdev__ void setAvg(const StatsT& v) { mAverage = v; }
- __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; }
+ __hostdev__ void setMin(__global__ const ValueT& v) { mMinimum = v; }
+ __hostdev__ void setMax(__global__ const ValueT& v) { mMaximum = v; }
+ __hostdev__ void setAvg(__global__ const StatsT& v) { mAverage = v; }
+ __hostdev__ void setDev(__global__ const StatsT& v) { mStdDevi = v; }
/// @brief This class cannot be constructed or deleted
RootData() = delete;
- RootData(const RootData&) = delete;
- RootData& operator=(const RootData&) = delete;
+ RootData(__global__ const RootData&) = delete;
+ __global__ RootData& operator=(__global__ const RootData&) = delete;
~RootData() = delete;
}; // RootData
/// @brief Top-most node of the VDB tree structure.
template<typename ChildT>
-class RootNode : private RootData<ChildT>
+class RootNode
+#if !defined(__KERNEL_METAL__)
+ : private RootData<ChildT>
+#endif
{
public:
+#if defined(__KERNEL_METAL__)
+
+ RootData<ChildT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
+
using DataType = RootData<ChildT>;
using LeafNodeType = typename ChildT::LeafNodeType;
using ChildNodeType = ChildT;
@@ -3086,27 +3468,27 @@
using BBoxType = BBox<CoordType>;
using AccessorType = DefaultReadAccessor<BuildType>;
using Tile = typename DataType::Tile;
- static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
+ static __constant__ constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
- static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
+ static __constant__ constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
class ChildIterator
{
- const DataType *mParent;
- uint32_t mPos, mSize;
+ __global__ const DataType *mParent;
+ uint32_t mPos, mSize;
public:
__hostdev__ ChildIterator() : mParent(nullptr), mPos(0), mSize(0) {}
- __hostdev__ ChildIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()) {
+ __hostdev__ ChildIterator(__global__ const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()) {
NANOVDB_ASSERT(mParent);
while (mPos<mSize && !mParent->tile(mPos)->isChild()) ++mPos;
}
- ChildIterator& operator=(const ChildIterator&) = default;
- __hostdev__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(mParent->tile(mPos));}
- __hostdev__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(mParent->tile(mPos));}
+ __global__ ChildIterator& operator=(__global__ const ChildIterator&) = default;
+ __hostdev__ __global__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(mParent->tile(mPos));}
+ __hostdev__ __global__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(mParent->tile(mPos));}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();}
__hostdev__ operator bool() const {return mPos < mSize;}
__hostdev__ uint32_t pos() const {return mPos;}
- __hostdev__ ChildIterator& operator++() {
+ __hostdev__ __global__ ChildIterator& operator++() {
NANOVDB_ASSERT(mParent);
++mPos;
while (mPos < mSize && mParent->tile(mPos)->isValue()) ++mPos;
@@ -3123,21 +3505,21 @@
class ValueIterator
{
- const DataType *mParent;
- uint32_t mPos, mSize;
+ __global__ const DataType *mParent;
+ uint32_t mPos, mSize;
public:
__hostdev__ ValueIterator() : mParent(nullptr), mPos(0), mSize(0) {}
- __hostdev__ ValueIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
+ __hostdev__ ValueIterator(__global__ const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
NANOVDB_ASSERT(mParent);
while (mPos < mSize && mParent->tile(mPos)->isChild()) ++mPos;
}
- ValueIterator& operator=(const ValueIterator&) = default;
+ __global__ ValueIterator& operator=(__global__ const ValueIterator&) = default;
__hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->value;}
__hostdev__ bool isActive() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->state;}
__hostdev__ operator bool() const {return mPos < mSize;}
__hostdev__ uint32_t pos() const {return mPos;}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();}
- __hostdev__ ValueIterator& operator++() {
+ __hostdev__ __global__ ValueIterator& operator++() {
NANOVDB_ASSERT(mParent);
++mPos;
while (mPos < mSize && mParent->tile(mPos)->isChild()) ++mPos;
@@ -3154,20 +3536,20 @@
class ValueOnIterator
{
- const DataType *mParent;
+ __global__ const DataType *mParent;
uint32_t mPos, mSize;
public:
__hostdev__ ValueOnIterator() : mParent(nullptr), mPos(0), mSize(0) {}
- __hostdev__ ValueOnIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
+ __hostdev__ ValueOnIterator(__global__ const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
NANOVDB_ASSERT(mParent);
while (mPos < mSize && !mParent->tile(mPos)->isActive()) ++mPos;
}
- ValueOnIterator& operator=(const ValueOnIterator&) = default;
+ __global__ ValueOnIterator& operator=(__global__ const ValueOnIterator&) = default;
__hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->value;}
__hostdev__ operator bool() const {return mPos < mSize;}
__hostdev__ uint32_t pos() const {return mPos;}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();}
- __hostdev__ ValueOnIterator& operator++() {
+ __hostdev__ __global__ ValueOnIterator& operator++() {
NANOVDB_ASSERT(mParent);
++mPos;
while (mPos < mSize && !mParent->tile(mPos)->isActive()) ++mPos;
@@ -3183,75 +3565,107 @@
ValueOnIterator beginValueOn() const {return ValueOnIterator(this);}
/// @brief This class cannot be constructed or deleted
- RootNode() = delete;
- RootNode(const RootNode&) = delete;
- RootNode& operator=(const RootNode&) = delete;
- ~RootNode() = delete;
+ RootNode() __global__ = delete;
+ RootNode(__global__ const RootNode&) __global__ = delete;
+ __global__ RootNode& operator=(__global__ const RootNode&) __global__ = delete;
+ ~RootNode() __global__ = delete;
- __hostdev__ AccessorType getAccessor() const { return AccessorType(*this); }
+ __hostdev__ AccessorType getAccessor() const __global__ { return AccessorType(*this); }
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree
- __hostdev__ const BBoxType& bbox() const { return DataType::mBBox; }
+ __hostdev__ __global__ const BBoxType& bbox() const __global__ { return BASE(mBBox); }
/// @brief Return the total number of active voxels in the root and all its child nodes.
/// @brief Return a const reference to the background value, i.e. the value associated with
/// any coordinate location that has not been set explicitly.
- __hostdev__ const ValueType& background() const { return DataType::mBackground; }
+ __hostdev__ __global__ const ValueType& background() const __global__ { return DataType::mBackground; }
/// @brief Return the number of tiles encoded in this root node
- __hostdev__ const uint32_t& tileCount() const { return DataType::mTableSize; }
+ __hostdev__ __global__ const uint32_t& tileCount() const __global__ { return DataType::mTableSize; }
/// @brief Return a const reference to the minimum active value encoded in this root node and any of its child nodes
- __hostdev__ const ValueType& minimum() const { return this->getMin(); }
+ __hostdev__ __global__ const ValueType& minimum() const __global__ { return this->getMin(); }
/// @brief Return a const reference to the maximum active value encoded in this root node and any of its child nodes
- __hostdev__ const ValueType& maximum() const { return this->getMax(); }
+ __hostdev__ __global__ const ValueType& maximum() const __global__ { return this->getMax(); }
/// @brief Return a const reference to the average of all the active values encoded in this root node and any of its child nodes
- __hostdev__ const FloatType& average() const { return DataType::mAverage; }
+ __hostdev__ __global__ const FloatType& average() const __global__ { return DataType::mAverage; }
/// @brief Return the variance of all the active values encoded in this root node and any of its child nodes
- __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; }
+ __hostdev__ FloatType variance() const __global__ { return DataType::mStdDevi * DataType::mStdDevi; }
/// @brief Return a const reference to the standard deviation of all the active values encoded in this root node and any of its child nodes
- __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; }
+ __hostdev__ __global__ const FloatType& stdDeviation() const __global__ { return DataType::mStdDevi; }
/// @brief Return the expected memory footprint in bytes with the specified number of tiles
__hostdev__ static uint64_t memUsage(uint32_t tableSize) { return sizeof(RootNode) + tableSize * sizeof(Tile); }
/// @brief Return the actual memory footprint of this root node
- __hostdev__ uint64_t memUsage() const { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); }
+ __hostdev__ uint64_t memUsage() const __global__ { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); }
/// @brief Return the value of the given voxel
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value;
}
return DataType::mBackground;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value;
+ }
+ return DataType::mBackground;
+ }
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __local__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value;
+ }
+ return DataType::mBackground;
+ }
+#endif
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
- return tile->isChild() ? this->getChild(tile)->isActive(ijk) : tile->state;
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? BASE(getChild)(tile)->isActive(ijk) : tile->state;
}
return false;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? BASE(getChild)(tile)->isActive(ijk) : tile->state;
+ }
+ return false;
+ }
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? BASE(getChild)(tile)->isActive(ijk) : tile->state;
+ }
+ return false;
+ }
+#endif
/// @brief Return true if this RootNode is empty, i.e. contains no values or nodes
- __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); }
+ __hostdev__ bool isEmpty() const __global__ { return BASE(mTableSize) == uint32_t(0); }
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = this->getChild(tile);
return child->probeValue(ijk, v);
}
v = tile->value;
@@ -3260,20 +3674,35 @@
v = DataType::mBackground;
return false;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool probeValue(__local__ const CoordType& ijk, __local__ ValueType& v) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ return child->probeValue(ijk, v);
+ }
+ v = tile->value;
+ return tile->state;
+ }
+ v = BASE(mBackground);
+ return false;
+ }
+#endif
- __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafNodeType* probeLeaf(__global__ const CoordType& ijk) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
- const auto *child = this->getChild(tile);
+ const __global__ auto *child = this->getChild(tile);
return child->probeLeaf(ijk);
}
return nullptr;
}
- __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const
+ __hostdev__ __global__ const ChildNodeType* probeChild(__global__ const CoordType& ijk) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
return this->getChild(tile);
}
@@ -3280,13 +3709,14 @@
return nullptr;
}
+
/// @brief Find and return a Tile of this root node
- __hostdev__ const Tile* probeTile(const CoordType& ijk) const
+ __hostdev__ __global__ const Tile* probeTile(__global__ const CoordType& ijk) const __global__
{
- const Tile* tiles = reinterpret_cast<const Tile*>(this + 1);
- const auto key = DataType::CoordToKey(ijk);
+ __global__ const Tile* tiles = reinterpret_cast<__global__ const Tile*>(this + 1);
+ const auto key = BASE(CoordToKey)(ijk);
#if 1 // switch between linear and binary seach
- for (uint32_t i = 0; i < DataType::mTableSize; ++i) {
+ for (uint32_t i = 0; i < BASE(mTableSize); ++i) {
if (tiles[i].key == key) return &tiles[i];
}
#else// do not enable binary search if tiles are not guaranteed to be sorted!!!!!!
@@ -3306,6 +3736,33 @@
#endif
return nullptr;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const Tile* findTile(__local__ const CoordType& ijk) const __global__
+ {
+ __global__ const Tile* tiles = reinterpret_cast<__global__ const Tile*>(this + 1);
+ const auto key = BASE(CoordToKey)(ijk);
+#if 1 // switch between linear and binary seach
+ for (uint32_t i = 0; i < BASE(mTableSize); ++i) {
+ if (tiles[i].key == key) return &tiles[i];
+ }
+#else// do not enable binary search if tiles are not guaranteed to be sorted!!!!!!
+ // binary-search of pre-sorted elements
+ int32_t low = 0, high = DataType::mTableSize; // low is inclusive and high is exclusive
+ while (low != high) {
+ int mid = low + ((high - low) >> 1);
+ const Tile* tile = &tiles[mid];
+ if (tile->key == key) {
+ return tile;
+ } else if (tile->key < key) {
+ low = mid + 1;
+ } else {
+ high = mid;
+ }
+ }
+#endif
+ return nullptr;
+ }
+#endif
private:
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(RootData) is misaligned");
@@ -3319,12 +3776,12 @@
/// @brief Private method to return node information and update a ReadAccessor
template<typename AccT>
- __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const
{
using NodeInfoT = typename AccT::NodeInfo;
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = this->getChild(tile);
acc.insert(ijk, child);
return child->getNodeInfoAndCache(ijk, acc);
}
@@ -3337,11 +3794,11 @@
/// @brief Private method to return a voxel value and update a ReadAccessor
template<typename AccT>
- __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ ValueType getValueAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = this->getChild(tile);
acc.insert(ijk, child);
return child->getValueAndCache(ijk, acc);
}
@@ -3349,25 +3806,66 @@
}
return DataType::mBackground;
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->getValueAndCache(ijk, acc);
+ }
+ return tile->value;
+ }
+ return BASE(mBackground);
+ }
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __local__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->getValueAndCache(ijk, acc);
+ }
+ return tile->value;
+ }
+ return BASE(mBackground);
+ }
+#endif
template<typename AccT>
- __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ bool isActiveAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->isActiveAndCache(ijk, acc);
}
return false;
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ bool isActiveAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ __global__ const Tile* tile = this->findTile(ijk);
+ if (tile && tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->isActiveAndCache(ijk, acc);
+ }
+ return false;
+ }
+#endif
template<typename AccT>
- __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const
+ __hostdev__ bool probeValueAndCache(__global__ const CoordType& ijk, __global__ ValueType& v, __global__ const AccT& acc) const
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->probeValueAndCache(ijk, v, acc);
}
@@ -3379,11 +3877,11 @@
}
template<typename AccT>
- __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ __global__ const LeafNodeType* probeLeafAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->probeLeafAndCache(ijk, acc);
}
@@ -3391,11 +3889,11 @@
}
template<typename RayT, typename AccT>
- __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const
+ __hostdev__ uint32_t getDimAndCache(__global__ const CoordType& ijk, __global__ const RayT& ray, __global__ const AccT& acc) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->getDimAndCache(ijk, ray, acc);
}
@@ -3403,7 +3901,23 @@
}
return ChildNodeType::dim(); // background
}
+#if defined(__KERNEL_METAL__)
+ template<typename RayT, typename AccT>
+ __hostdev__ uint32_t getDimAndCache(__local__ const CoordType& ijk, __local__ const RayT& ray, __local__ const AccT& acc) const __global__
+ {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->getDimAndCache(ijk, ray, acc);
+ }
+ return 1 << ChildT::TOTAL; //tile value
+ }
+ return ChildNodeType::dim(); // background
+ }
+#endif
+#undef BASE
}; // RootNode class
// After the RootNode the memory layout is assumed to be the sorted Tiles
@@ -3421,7 +3935,7 @@
using StatsT = typename ChildT::FloatType;
using CoordT = typename ChildT::CoordType;
using MaskT = typename ChildT::template MaskType<LOG2DIM>;
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
union Tile
{
@@ -3429,8 +3943,8 @@
int64_t child;//signed 64 bit byte offset relative to the InternalData!!
/// @brief This class cannot be constructed or deleted
Tile() = delete;
- Tile(const Tile&) = delete;
- Tile& operator=(const Tile&) = delete;
+ Tile(__global__ const Tile&) = delete;
+ __global__ Tile& operator=(__global__ const Tile&) = delete;
~Tile() = delete;
};
@@ -3456,7 +3970,7 @@
__hostdev__ static uint64_t memUsage() { return sizeof(InternalData); }
- __hostdev__ void setChild(uint32_t n, const void *ptr)
+ __hostdev__ void setChild(uint32_t n, __global__ const void *ptr)
{
NANOVDB_ASSERT(mChildMask.isOn(n));
mTable[n].child = PtrDiff(ptr, this);
@@ -3463,7 +3977,7 @@
}
template <typename ValueT>
- __hostdev__ void setValue(uint32_t n, const ValueT &v)
+ __hostdev__ void setValue(uint32_t n, __global__ const ValueT &v)
{
NANOVDB_ASSERT(!mChildMask.isOn(n));
mTable[n].value = v;
@@ -3470,18 +3984,18 @@
}
/// @brief Returns a pointer to the child node at the specifed linear offset.
- __hostdev__ ChildT* getChild(uint32_t n)
+ __hostdev__ __global__ ChildT* getChild(uint32_t n) __global__
{
NANOVDB_ASSERT(mChildMask.isOn(n));
return PtrAdd<ChildT>(this, mTable[n].child);
}
- __hostdev__ const ChildT* getChild(uint32_t n) const
+ __hostdev__ __global__ const ChildT* getChild(uint32_t n) const __global__
{
NANOVDB_ASSERT(mChildMask.isOn(n));
return PtrAdd<ChildT>(this, mTable[n].child);
}
- __hostdev__ ValueT getValue(uint32_t n) const
+ __hostdev__ ValueT getValue(uint32_t n) const __global__
{
NANOVDB_ASSERT(!mChildMask.isOn(n));
return mTable[n].value;
@@ -3496,29 +4010,38 @@
__hostdev__ bool isChild(uint32_t n) const {return mChildMask.isOn(n);}
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBox[0] = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) { mBBox[0] = ijk; }
- __hostdev__ const ValueT& getMin() const { return mMinimum; }
- __hostdev__ const ValueT& getMax() const { return mMaximum; }
- __hostdev__ const StatsT& average() const { return mAverage; }
- __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; }
+ __hostdev__ __global__ const ValueT& getMin() const { return mMinimum; }
+ __hostdev__ __global__ const ValueT& getMax() const { return mMaximum; }
+ __hostdev__ __global__ const StatsT& average() const { return mAverage; }
+ __hostdev__ __global__ const StatsT& stdDeviation() const { return mStdDevi; }
- __hostdev__ void setMin(const ValueT& v) { mMinimum = v; }
- __hostdev__ void setMax(const ValueT& v) { mMaximum = v; }
- __hostdev__ void setAvg(const StatsT& v) { mAverage = v; }
- __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; }
+ __hostdev__ void setMin(__global__ const ValueT& v) { mMinimum = v; }
+ __hostdev__ void setMax(__global__ const ValueT& v) { mMaximum = v; }
+ __hostdev__ void setAvg(__global__ const StatsT& v) { mAverage = v; }
+ __hostdev__ void setDev(__global__ const StatsT& v) { mStdDevi = v; }
/// @brief This class cannot be constructed or deleted
InternalData() = delete;
- InternalData(const InternalData&) = delete;
- InternalData& operator=(const InternalData&) = delete;
+ InternalData(__global__ const InternalData&) = delete;
+ __global__ InternalData& operator=(__global__ const InternalData&) = delete;
~InternalData() = delete;
}; // InternalData
/// @brief Internal nodes of a VDB treedim(),
template<typename ChildT, uint32_t Log2Dim = ChildT::LOG2DIM + 1>
-class InternalNode : private InternalData<ChildT, Log2Dim>
+class InternalNode
+#if !defined(__KERNEL_METAL__)
+ : private InternalData<ChildT, Log2Dim>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ InternalData<ChildT, Log2Dim> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
public:
using DataType = InternalData<ChildT, Log2Dim>;
using ValueType = typename DataType::ValueT;
@@ -3527,31 +4050,40 @@
using LeafNodeType = typename ChildT::LeafNodeType;
using ChildNodeType = ChildT;
using CoordType = typename ChildT::CoordType;
- static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
+ static __constant__ constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
template<uint32_t LOG2>
using MaskType = typename ChildT::template MaskType<LOG2>;
template<bool On>
using MaskIterT = typename Mask<Log2Dim>::template Iterator<On>;
- static constexpr uint32_t LOG2DIM = Log2Dim;
- static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; // dimension in index space
- static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
- static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); // number of tile values (or child pointers)
- static constexpr uint32_t MASK = (1u << TOTAL) - 1u;
- static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
- static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
+ static __constant__ constexpr uint32_t LOG2DIM = Log2Dim;
+ static __constant__ constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; // dimension in index space
+ static __constant__ constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
+ static __constant__ constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); // number of tile values (or child pointers)
+ static __constant__ constexpr uint32_t MASK = (1u << TOTAL) - 1u;
+ static __constant__ constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
+ static __constant__ constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
/// @brief Visits child nodes of this node only
- class ChildIterator : public MaskIterT<true>
+ class ChildIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<true>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ MaskIterT<true> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<true>;
- const DataType *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const DataType *mParent;
public:
__hostdev__ ChildIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ChildIterator(const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOn()), mParent(parent->data()) {}
- ChildIterator& operator=(const ChildIterator&) = default;
- __hostdev__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(BaseT::pos());}
- __hostdev__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(BaseT::pos());}
+ __hostdev__ ChildIterator(__global__ const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOn()), mParent(parent->data()) {}
+ __global__ ChildIterator& operator=(__global__ const ChildIterator&) = default;
+ __hostdev__ __global__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(BASE(pos)());}
+ __hostdev__ __global__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(BASE(pos)());}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return (*this)->origin();}
}; // Member class ChildIterator
@@ -3558,45 +4090,69 @@
ChildIterator beginChild() const {return ChildIterator(this);}
/// @brief Visits all tile values in this node, i.e. both inactive and active tiles
- class ValueIterator : public MaskIterT<false>
+ class ValueIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<false>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ MaskIterT<false> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<false>;
- const InternalNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const InternalNode *mParent;
public:
__hostdev__ ValueIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueIterator(const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOff()), mParent(parent) {}
- ValueIterator& operator=(const ValueIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BaseT::pos());}
- __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BaseT::pos());}
- __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->data()->isActive(BaseT::mPos);}
+ __hostdev__ ValueIterator(__global__ const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOff()), mParent(parent) {}
+ __global__ ValueIterator& operator=(__global__ const ValueIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BASE(pos)());}
+ __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BASE(pos)());}
+ __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->data()->isActive(BASE(mPos));}
}; // Member class ValueIterator
ValueIterator beginValue() const {return ValueIterator(this);}
/// @brief Visits active tile values of this node only
- class ValueOnIterator : public MaskIterT<true>
+ class ValueOnIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<true>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ MaskIterT<true> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<true>;
- const InternalNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const InternalNode *mParent;
public:
__hostdev__ ValueOnIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueOnIterator(const InternalNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
- ValueOnIterator& operator=(const ValueOnIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BaseT::pos());}
- __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BaseT::pos());}
+ __hostdev__ ValueOnIterator(__global__ const InternalNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
+ __global__ ValueOnIterator& operator=(__global__ const ValueOnIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BASE(pos)());}
+ __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BASE(pos)());}
}; // Member class ValueOnIterator
ValueOnIterator beginValueOn() const {return ValueOnIterator(this);}
+#if defined(__KERNEL_METAL__)
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
+
/// @brief This class cannot be constructed or deleted
- InternalNode() = delete;
- InternalNode(const InternalNode&) = delete;
- InternalNode& operator=(const InternalNode&) = delete;
+ InternalNode() __global__ = delete;
+ InternalNode(__global__ const InternalNode&) __global__ = delete;
+ __global__ InternalNode& operator=(__global__ const InternalNode&) __global__ = delete;
~InternalNode() = delete;
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return the dimension, in voxel units, of this internal node (typically 8*16 or 8*16*32)
__hostdev__ static uint32_t dim() { return 1u << TOTAL; }
@@ -3605,47 +4161,66 @@
__hostdev__ static size_t memUsage() { return DataType::memUsage(); }
/// @brief Return a const reference to the bit mask of active voxels in this internal node
- __hostdev__ const MaskType<LOG2DIM>& valueMask() const { return DataType::mValueMask; }
+ __hostdev__ __global__ const MaskType<LOG2DIM>& valueMask() const __global__ { return BASE(mValueMask); }
/// @brief Return a const reference to the bit mask of child nodes in this internal node
- __hostdev__ const MaskType<LOG2DIM>& childMask() const { return DataType::mChildMask; }
+ __hostdev__ __global__ const MaskType<LOG2DIM>& childMask() const __global__ { return DataType::mChildMask; }
/// @brief Return the origin in index space of this leaf node
- __hostdev__ CoordType origin() const { return DataType::mBBox.min() & ~MASK; }
+ __hostdev__ CoordType origin() const __global__ { return DataType::mBBox.min() & ~MASK; }
/// @brief Return a const reference to the minimum active value encoded in this internal node and any of its child nodes
- __hostdev__ const ValueType& minimum() const { return this->getMin(); }
+ __hostdev__ __global__ const ValueType& minimum() const __global__ { return this->getMin(); }
/// @brief Return a const reference to the maximum active value encoded in this internal node and any of its child nodes
- __hostdev__ const ValueType& maximum() const { return this->getMax(); }
+ __hostdev__ __global__ const ValueType& maximum() const __global__ { return this->getMax(); }
/// @brief Return a const reference to the average of all the active values encoded in this internal node and any of its child nodes
- __hostdev__ const FloatType& average() const { return DataType::mAverage; }
+ __hostdev__ __global__ const FloatType& average() const __global__ { return DataType::mAverage; }
/// @brief Return the variance of all the active values encoded in this internal node and any of its child nodes
- __hostdev__ FloatType variance() const { return DataType::mStdDevi*DataType::mStdDevi; }
+ __hostdev__ FloatType variance() const __global__ { return DataType::mStdDevi*DataType::mStdDevi; }
/// @brief Return a const reference to the standard deviation of all the active values encoded in this internal node and any of its child nodes
- __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; }
+ __hostdev__ __global__ const FloatType& stdDeviation() const __global__ { return DataType::mStdDevi; }
/// @brief Return a const reference to the bounding box in index space of active values in this internal node and any of its child nodes
- __hostdev__ const BBox<CoordType>& bbox() const { return DataType::mBBox; }
+ __hostdev__ __global__ const BBox<CoordType>& bbox() const __global__ { return DataType::mBBox; }
/// @brief Return the value of the given voxel
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::getValue(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::mTable[n].value;
+ }
+#endif
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n);
+ }
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n);
+ }
+#endif
/// @brief return the state and updates the value of the specified voxel
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (DataType::mChildMask.isOn(n))
@@ -3653,8 +4228,18 @@
v = DataType::getValue(n);
return DataType::isActive(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool probeValue(__local__ const CoordType& ijk, __local__ ValueType& v) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ if (DataType::mChildMask.isOn(n))
+ return this->getChild(n)->probeValue(ijk, v);
+ v = DataType::getValue(n);
+ return DataType::isActive(n);
+ }
+#endif
- __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafNodeType* probeLeaf(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (DataType::mChildMask.isOn(n))
@@ -3662,7 +4247,7 @@
return nullptr;
}
- __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const
+ __hostdev__ __global__ const ChildNodeType* probeChild(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr;
@@ -3669,7 +4254,7 @@
}
/// @brief Return the linear offset corresponding to the given coordinate
- __hostdev__ static uint32_t CoordToOffset(const CoordType& ijk)
+ __hostdev__ static uint32_t CoordToOffset(__global__ const CoordType& ijk)
{
#if 0
return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) +
@@ -3681,6 +4266,20 @@
((ijk[2] & MASK) >> ChildT::TOTAL);
#endif
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ static uint32_t CoordToOffset(__local__ const CoordType& ijk)
+ {
+#if 0
+ return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) +
+ (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) +
+ ((ijk[2] & MASK) >> ChildT::TOTAL);
+#else
+ return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) |
+ (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) |
+ ((ijk[2] & MASK) >> ChildT::TOTAL);
+#endif
+ }
+#endif
/// @return the local coordinate of the n'th tile or child node
__hostdev__ static Coord OffsetToLocalCoord(uint32_t n)
@@ -3691,13 +4290,13 @@
}
/// @brief modifies local coordinates to global coordinates of a tile or child node
- __hostdev__ void localToGlobalCoord(Coord& ijk) const
+ __hostdev__ void localToGlobalCoord(__global__ Coord& ijk) const __global__
{
ijk <<= ChildT::TOTAL;
ijk += this->origin();
}
- __hostdev__ Coord offsetToGlobalCoord(uint32_t n) const
+ __hostdev__ Coord offsetToGlobalCoord(uint32_t n) const __global__
{
Coord ijk = InternalNode::OffsetToLocalCoord(n);
this->localToGlobalCoord(ijk);
@@ -3705,13 +4304,24 @@
}
/// @brief Return true if this node or any of its child nodes contain active values
- __hostdev__ bool isActive() const
+ __hostdev__ bool isActive() const __global__
{
return DataType::mFlags & uint32_t(2);
}
+#if defined(__KERNEL_METAL__)
+ /// @brief Retrun true if this node or any of its child nodes contain active values
+ __hostdev__ bool isActive() const __local__
+ {
+ return DataType::mFlags & uint32_t(2);
+ }
+#endif
private:
+#if !defined(__KERNEL_METAL__)
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned");
+#else
+ static_assert(sizeof(_base) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned");
+#endif
//static_assert(offsetof(DataType, mTable) % 32 == 0, "InternalData::mTable is misaligned");
template<typename, int, int, int>
@@ -3724,18 +4334,30 @@
/// @brief Private read access method used by the ReadAccessor
template<typename AccT>
- __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ ValueType getValueAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n))
- return DataType::getValue(n);
- const ChildT* child = this->getChild(n);
+ return BASE(getValue)(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->getValueAndCache(ijk, acc);
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ if (!BASE(mChildMask).isOn(n))
+ return BASE(getValue)(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
+ acc.insert(ijk, child);
+ return child->getValueAndCache(ijk, acc);
+ }
+#endif
template<typename AccT>
- __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
using NodeInfoT = typename AccT::NodeInfo;
const uint32_t n = CoordToOffset(ijk);
@@ -3743,24 +4365,36 @@
return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(),
this->stdDeviation(), this->bbox()[0], this->bbox()[1]};
}
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->getNodeInfoAndCache(ijk, acc);
}
template<typename AccT>
- __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ bool isActiveAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n))
return DataType::isActive(n);
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->isActiveAndCache(ijk, acc);
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ bool isActiveAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ if (!BASE(mChildMask).isOn(n))
+ return BASE(mValueMask).isOn(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
+ acc.insert(ijk, child);
+ return child->isActiveAndCache(ijk, acc);
+ }
+#endif
template<typename AccT>
- __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const
+ __hostdev__ bool probeValueAndCache(__global__ const CoordType& ijk, __global__ ValueType& v, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n)) {
@@ -3767,24 +4401,24 @@
v = DataType::getValue(n);
return DataType::isActive(n);
}
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->probeValueAndCache(ijk, v, acc);
}
template<typename AccT>
- __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ __global__ const LeafNodeType* probeLeafAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n))
return nullptr;
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->probeLeafAndCache(ijk, acc);
}
template<typename RayT, typename AccT>
- __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const
+ __hostdev__ uint32_t getDimAndCache(__global__ const CoordType& ijk, __global__ const RayT& ray, __global__ const AccT& acc) const __global__
{
if (DataType::mFlags & uint32_t(1u)) return this->dim(); // skip this node if the 1st bit is set
//if (!ray.intersects( this->bbox() )) return 1<<TOTAL;
@@ -3791,13 +4425,31 @@
const uint32_t n = CoordToOffset(ijk);
if (DataType::mChildMask.isOn(n)) {
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->getDimAndCache(ijk, ray, acc);
}
return ChildNodeType::dim(); // tile value
}
+#if defined(__KERNEL_METAL__)
+ template<typename RayT, typename AccT>
+ __hostdev__ uint32_t getDimAndCache(__local__ const CoordType& ijk, __local__ const RayT& ray, __local__ const AccT& acc) const __global__
+ {
+ if (BASE(mFlags) & uint32_t(1))
+ this->dim(); //ship this node if first bit is set
+ //if (!ray.intersects( this->bbox() )) return 1<<TOTAL;
+ const uint32_t n = CoordToOffset(ijk);
+ if (BASE(mChildMask).isOn(n)) {
+ __global__ const ChildT* child = BASE(getChild)(n);
+ acc.insert(ijk, child);
+ return child->getDimAndCache(ijk, ray, acc);
+ }
+ return ChildNodeType::dim(); // tile value
+ }
+#endif
+
+#undef BASE
}; // InternalNode class
// --------------------------> LeafNode <------------------------------------
@@ -3814,7 +4466,7 @@
using BuildType = ValueT;
using FloatType = typename FloatTraits<ValueT>::FloatType;
using ArrayType = ValueT;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -3826,7 +4478,7 @@
FloatType mAverage; // typically 4B, average of all the active values in this node and its child nodes
FloatType mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes
alignas(32) ValueType mValues[1u << 3 * LOG2DIM];
-
+
/// @brief Return padding of this class in bytes, due to aliasing and 32B alignment
///
/// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members.
@@ -3838,32 +4490,35 @@
__hostdev__ static uint64_t memUsage() { return sizeof(LeafData); }
//__hostdev__ const ValueType* values() const { return mValues; }
- __hostdev__ ValueType getValue(uint32_t i) const { return mValues[i]; }
- __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; }
- __hostdev__ void setValue(uint32_t offset, const ValueType& value)
+ __hostdev__ ValueType getValue(uint32_t i) const __global__ { return mValues[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(uint32_t i) const __local__ { return mValues[i]; }
+#endif
+ __hostdev__ void setValueOnly(uint32_t offset, __global__ const ValueType& value) __global__ { mValues[offset] = value; }
+ __hostdev__ void setValue(uint32_t offset, __global__ const ValueType& value) __global__
{
mValueMask.setOn(offset);
mValues[offset] = value;
}
- __hostdev__ ValueType getMin() const { return mMinimum; }
- __hostdev__ ValueType getMax() const { return mMaximum; }
- __hostdev__ FloatType getAvg() const { return mAverage; }
- __hostdev__ FloatType getDev() const { return mStdDevi; }
+ __hostdev__ ValueType getMin() const __global__ { return mMinimum; }
+ __hostdev__ ValueType getMax() const __global__ { return mMaximum; }
+ __hostdev__ FloatType getAvg() const __global__ { return mAverage; }
+ __hostdev__ FloatType getDev() const __global__ { return mStdDevi; }
- __hostdev__ void setMin(const ValueType& v) { mMinimum = v; }
- __hostdev__ void setMax(const ValueType& v) { mMaximum = v; }
- __hostdev__ void setAvg(const FloatType& v) { mAverage = v; }
- __hostdev__ void setDev(const FloatType& v) { mStdDevi = v; }
+ __hostdev__ void setMin(__global__ const ValueType& v) __global__ { mMinimum = v; }
+ __hostdev__ void setMax(__global__ const ValueType& v) __global__ { mMaximum = v; }
+ __hostdev__ void setAvg(__global__ const FloatType& v) __global__ { mAverage = v; }
+ __hostdev__ void setDev(__global__ const FloatType& v) __global__ { mStdDevi = v; }
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<ValueT>
/// @brief Base-class for quantized float leaf nodes
@@ -3892,7 +4547,7 @@
__hostdev__ static constexpr uint32_t padding() {
return sizeof(LeafFnBase) - (12 + 3 + 1 + sizeof(MaskT<LOG2DIM>) + 2*4 + 4*2);
}
- __hostdev__ void init(float min, float max, uint8_t bitWidth)
+ __hostdev__ void init(float min, float max, uint8_t bitWidth) __global__
{
mMinimum = min;
mQuantum = (max - min)/float((1 << bitWidth)-1);
@@ -3899,32 +4554,32 @@
}
/// @brief return the quantized minimum of the active values in this node
- __hostdev__ float getMin() const { return mMin*mQuantum + mMinimum; }
+ __hostdev__ float getMin() const __global__ { return mMin*mQuantum + mMinimum; }
/// @brief return the quantized maximum of the active values in this node
- __hostdev__ float getMax() const { return mMax*mQuantum + mMinimum; }
+ __hostdev__ float getMax() const __global__ { return mMax*mQuantum + mMinimum; }
/// @brief return the quantized average of the active values in this node
- __hostdev__ float getAvg() const { return mAvg*mQuantum + mMinimum; }
+ __hostdev__ float getAvg() const __global__ { return mAvg*mQuantum + mMinimum; }
/// @brief return the quantized standard deviation of the active values in this node
/// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1
- __hostdev__ float getDev() const { return mDev*mQuantum; }
+ __hostdev__ float getDev() const __global__ { return mDev*mQuantum; }
/// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1
- __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum)/mQuantum + 0.5f); }
+ __hostdev__ void setMin(float min) __global__ { mMin = uint16_t((min - mMinimum)/mQuantum + 0.5f); }
/// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1
- __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum)/mQuantum + 0.5f); }
+ __hostdev__ void setMax(float max) __global__ { mMax = uint16_t((max - mMinimum)/mQuantum + 0.5f); }
/// @note min <= avg <= max or 0 <= (avg-min)/(min-max) <= 1
- __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum)/mQuantum + 0.5f); }
+ __hostdev__ void setAvg(float avg) __global__ { mAvg = uint16_t((avg - mMinimum)/mQuantum + 0.5f); }
/// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1
- __hostdev__ void setDev(float dev) { mDev = uint16_t(dev/mQuantum + 0.5f); }
+ __hostdev__ void setDev(float dev) __global__ { mDev = uint16_t(dev/mQuantum + 0.5f); }
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
};// LeafFnBase
/// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode)
@@ -3932,12 +4587,24 @@
/// @note No client code should (or can) interface with this struct so it can safely be ignored!
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp4, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = Fp4;
using ArrayType = uint8_t;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+#if defined(__KERNEL_METAL__)
+ using ValueType = typename BaseT::ValueType;
+ using FloatType = typename BaseT::FloatType;
+#endif
+ static __constant__ constexpr bool FIXED_SIZE = true;
alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)];// LeafFnBase is 32B aligned and so is mCode
__hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); }
@@ -3947,31 +4614,53 @@
}
__hostdev__ static constexpr uint8_t bitWidth() { return 4u; }
- __hostdev__ float getValue(uint32_t i) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __global__
{
#if 0
const uint8_t c = mCode[i>>1];
return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum;
#else
- return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BaseT::mQuantum + BaseT::mMinimum;
+ return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BASE(mQuantum) + BASE(mMinimum);
#endif
}
+#endif
+#if defined(__KERNEL_METAL__)
+__hostdev__ float getValue(uint32_t i) const __local__
+ {
+#if 0
+ const uint8_t c = mCode[i>>1];
+ return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum;
+#else
+ return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BASE(mQuantum) + BASE(mMinimum);
+#endif
+ }
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+#undef BASE
}; // LeafData<Fp4>
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp8, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = Fp8;
using ArrayType = uint8_t;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
alignas(32) uint8_t mCode[1u << 3 * LOG2DIM];
__hostdev__ static constexpr int64_t memUsage() { return sizeof(LeafData); }
__hostdev__ static constexpr uint32_t padding() {
@@ -3980,25 +4669,44 @@
}
__hostdev__ static constexpr uint8_t bitWidth() { return 8u; }
- __hostdev__ float getValue(uint32_t i) const
+ __hostdev__ float getValue(uint32_t i) const __global__
{
return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/255 + min
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __local__
+ {
+ return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/255 + min
+ }
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+#undef BASE
}; // LeafData<Fp8>
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp16, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = Fp16;
using ArrayType = uint16_t;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+#if defined(__KERNEL_METAL__)
+ using ValueType = typename BaseT::ValueType;
+ using FloatType = typename BaseT::FloatType;
+#endif
+ static __constant__ constexpr bool FIXED_SIZE = true;
alignas(32) uint16_t mCode[1u << 3 * LOG2DIM];
__hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); }
@@ -4008,37 +4716,95 @@
}
__hostdev__ static constexpr uint8_t bitWidth() { return 16u; }
- __hostdev__ float getValue(uint32_t i) const
+ __hostdev__ float getValue(uint32_t i) const __global__
{
- return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/65535 + min
+ return mCode[i]*BASE(mQuantum) + BASE(mMinimum);// code * (max-min)/65535 + min
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __local__
+ {
+ return mCode[i]*BaseT::Quantum + BaseT::mMinimum;// code * (max-min)/65535 + min
+ }
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+#undef BASE
}; // LeafData<Fp16>
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<FpN, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{// this class has no data members, however every instance is immediately followed
// bitWidth*64 bytes. Since its base class is 32B aligned so are the bitWidth*64 bytes
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = FpN;
- static constexpr bool FIXED_SIZE = false;
+ static __constant__ constexpr bool FIXED_SIZE = false;
+#if defined(__KERNEL_METAL__)
+ using ValueType = typename BaseT::ValueType;
+ using FloatType = typename BaseT::FloatType;
+#endif
__hostdev__ static constexpr uint32_t padding() {
static_assert(BaseT::padding()==0, "expected no padding in LeafFnBase");
return 0;
}
- __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); }// 4,8,16,32 = 2^(2,3,4,5)
- __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth()*64; }
+ __hostdev__ uint8_t bitWidth() const __global__ { return 1 << (BaseT::mFlags >> 5); }// 4,8,16,32 = 2^(2,3,4,5)
+ __hostdev__ size_t memUsage() const __global__ { return sizeof(*this) + this->bitWidth()*64; }
__hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth*64; }
- __hostdev__ float getValue(uint32_t i) const
+ __hostdev__ float getValue(uint32_t i) const __global__
{
#ifdef NANOVDB_FPN_BRANCHLESS// faster
+ const int b = BASE(mFlags) >> 5;// b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits
+#if 0// use LUT
+ uint16_t code = reinterpret_cast<const uint16_t*>(this + 1)[i >> (4 - b)];
+ const static uint8_t shift[5] = {15, 7, 3, 1, 0};
+ const static uint16_t mask[5] = {1, 3, 15, 255, 65535};
+ code >>= (i & shift[b]) << b;
+ code &= mask[b];
+#else// no LUT
+ uint32_t code = reinterpret_cast<__global__ const uint32_t*>(this + 1)[i >> (5 - b)];
+ //code >>= (i & ((16 >> b) - 1)) << b;
+ code >>= (i & ((32 >> b) - 1)) << b;
+ code &= (1 << (1 << b)) - 1;
+#endif
+#else// use branched version (slow)
+ float code;
+ __global__ auto *values = reinterpret_cast<const uint8_t*>(this+1);
+ switch (BaseT::mFlags >> 5) {
+ case 0u:// 1 bit float
+ code = float((values[i>>3] >> (i&7) ) & uint8_t(1));
+ break;
+ case 1u:// 2 bits float
+ code = float((values[i>>2] >> ((i&3)<<1)) & uint8_t(3));
+ break;
+ case 2u:// 4 bits float
+ code = float((values[i>>1] >> ((i&1)<<2)) & uint8_t(15));
+ break;
+ case 3u:// 8 bits float
+ code = float(values[i]);
+ break;
+ default:// 16 bits float
+ code = float(reinterpret_cast<const uint16_t*>(values)[i]);
+ }
+#endif
+ return float(code) * BASE(mQuantum) + BASE(mMinimum);// code * (max-min)/UNITS + min
+ }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __local__
+ {
+#ifdef NANOVDB_FPN_BRANCHLESS// faster
const int b = BaseT::mFlags >> 5;// b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits
#if 0// use LUT
uint16_t code = reinterpret_cast<const uint16_t*>(this + 1)[i >> (4 - b)];
@@ -4047,7 +4813,7 @@
code >>= (i & shift[b]) << b;
code &= mask[b];
#else// no LUT
- uint32_t code = reinterpret_cast<const uint32_t*>(this + 1)[i >> (5 - b)];
+ uint32_t code = reinterpret_cast<__global__ const uint32_t*>(this + 1)[i >> (5 - b)];
//code >>= (i & ((16 >> b) - 1)) << b;
code >>= (i & ((32 >> b) - 1)) << b;
code &= (1 << (1 << b)) - 1;
@@ -4054,7 +4820,7 @@
#endif
#else// use branched version (slow)
float code;
- auto *values = reinterpret_cast<const uint8_t*>(this+1);
+ __global__ auto *values = reinterpret_cast<const uint8_t*>(this+1);
switch (BaseT::mFlags >> 5) {
case 0u:// 1 bit float
code = float((values[i>>3] >> (i&7) ) & uint8_t(1));
@@ -4074,12 +4840,15 @@
#endif
return float(code) * BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/UNITS + min
}
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+
+#undef BASE
}; // LeafData<FpN>
// Partial template specialization of LeafData with bool
@@ -4092,7 +4861,7 @@
using BuildType = bool;
using FloatType = bool;// dummy value type
using ArrayType = MaskT<LOG2DIM>;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -4104,31 +4873,34 @@
__hostdev__ static constexpr uint32_t padding() {return sizeof(LeafData) - 12u - 3u - 1u - 2*sizeof(MaskT<LOG2DIM>) - 16u;}
__hostdev__ static uint64_t memUsage() { return sizeof(LeafData); }
- //__hostdev__ const ValueType* values() const { return nullptr; }
- __hostdev__ bool getValue(uint32_t i) const { return mValues.isOn(i); }
- __hostdev__ bool getMin() const { return false; }// dummy
- __hostdev__ bool getMax() const { return false; }// dummy
- __hostdev__ bool getAvg() const { return false; }// dummy
- __hostdev__ bool getDev() const { return false; }// dummy
- __hostdev__ void setValue(uint32_t offset, bool v)
+ //__hostdev__ __global__ const ValueType* values() const __global__ { return nullptr; }
+ __hostdev__ bool getValue(uint32_t i) const __global__ { return mValues.isOn(i); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool getValue(uint32_t i) const __local__ { return mValues.isOn(i); }
+#endif
+ __hostdev__ bool getMin() const __global__ { return false; }// dummy
+ __hostdev__ bool getMax() const __global__ { return false; }// dummy
+ __hostdev__ bool getAvg() const __global__ { return false; }// dummy
+ __hostdev__ bool getDev() const __global__ { return false; }// dummy
+ __hostdev__ void setValue(uint32_t offset, bool v) __global__
{
mValueMask.setOn(offset);
mValues.set(offset, v);
}
- __hostdev__ void setMin(const bool&) {}// no-op
- __hostdev__ void setMax(const bool&) {}// no-op
- __hostdev__ void setAvg(const bool&) {}// no-op
- __hostdev__ void setDev(const bool&) {}// no-op
+ __hostdev__ void setMin(__global__ const bool&) __global__ {}// no-op
+ __hostdev__ void setMax(__global__ const bool&) __global__ {}// no-op
+ __hostdev__ void setAvg(__global__ const bool&) __global__ {}// no-op
+ __hostdev__ void setDev(__global__ const bool&) __global__ {}// no-op
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<bool>
// Partial template specialization of LeafData with ValueMask
@@ -4141,7 +4913,7 @@
using BuildType = ValueMask;
using FloatType = bool;// dummy value type
using ArrayType = void;// type used for the internal mValue array - void means missing
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -4156,29 +4928,32 @@
}
//__hostdev__ const ValueType* values() const { return nullptr; }
- __hostdev__ bool getValue(uint32_t i) const { return mValueMask.isOn(i); }
- __hostdev__ bool getMin() const { return false; }// dummy
- __hostdev__ bool getMax() const { return false; }// dummy
- __hostdev__ bool getAvg() const { return false; }// dummy
- __hostdev__ bool getDev() const { return false; }// dummy
- __hostdev__ void setValue(uint32_t offset, bool)
+ __hostdev__ bool getValue(uint32_t i) const __global__ { return mValueMask.isOn(i); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool getValue(uint32_t i) const __local__ { return mValueMask.isOn(i); }
+#endif
+ __hostdev__ bool getMin() const __global__ { return false; }// dummy
+ __hostdev__ bool getMax() const __global__ { return false; }// dummy
+ __hostdev__ bool getAvg() const __global__ { return false; }// dummy
+ __hostdev__ bool getDev() const __global__ { return false; }// dummy
+ __hostdev__ void setValue(uint32_t offset, bool) __global__
{
mValueMask.setOn(offset);
}
- __hostdev__ void setMin(const ValueType&) {}// no-op
- __hostdev__ void setMax(const ValueType&) {}// no-op
- __hostdev__ void setAvg(const FloatType&) {}// no-op
- __hostdev__ void setDev(const FloatType&) {}// no-op
+ __hostdev__ void setMin(__global__ const ValueType&) __global__ {}// no-op
+ __hostdev__ void setMax(__global__ const ValueType&) __global__ {}// no-op
+ __hostdev__ void setAvg(__global__ const FloatType&) __global__ {}// no-op
+ __hostdev__ void setDev(__global__ const FloatType&) __global__ {}// no-op
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<ValueMask>
// Partial template specialization of LeafData with ValueIndex
@@ -4191,7 +4966,7 @@
using BuildType = ValueIndex;
using FloatType = uint64_t;
using ArrayType = void;// type used for the internal mValue array - void means missing
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -4208,16 +4983,16 @@
__hostdev__ static uint64_t memUsage() { return sizeof(LeafData); }
- __hostdev__ uint64_t getMin() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 0; }
- __hostdev__ uint64_t getMax() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 1; }
- __hostdev__ uint64_t getAvg() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 2; }
- __hostdev__ uint64_t getDev() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 3; }
+ __hostdev__ uint64_t getMin() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 0; }
+ __hostdev__ uint64_t getMax() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 1; }
+ __hostdev__ uint64_t getAvg() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 2; }
+ __hostdev__ uint64_t getDev() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 3; }
__hostdev__ void setValue(uint32_t offset, uint64_t)
{
mValueMask.setOn(offset);
}
- __hostdev__ uint64_t getValue(uint32_t i) const
+ __hostdev__ uint64_t getValue(uint32_t i) const __global__
{
if (mFlags & uint8_t(16u)) {// if 4th bit is set only active voxels are indexed
return mValueMask.isOn(i) ? mValueOff + mValueMask.countOn(i) : 0;// 0 is background
@@ -4224,23 +4999,31 @@
}
return mValueOff + i;// dense array of active and inactive voxels
}
-
+#if defined(__KERNEL_METAL__)
+ __hostdev__ uint64_t getValue(uint32_t i) const __local__
+ {
+ if (mFlags & uint8_t(16u)) {// if 4th bit is set only active voxels are indexed
+ return mValueMask.isOn(i) ? mValueOff + mValueMask.countOn(i) : 0;// 0 is background
+ }
+ return mValueOff + i;// dense array of active and inactive voxels
+ }
+#endif
template <typename T>
- __hostdev__ void setMin(const T &min, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 0] = min; }
+ __hostdev__ void setMin(__global__ const T &min, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 0] = min; }
template <typename T>
- __hostdev__ void setMax(const T &max, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 1] = max; }
+ __hostdev__ void setMax(__global__ const T &max, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 1] = max; }
template <typename T>
- __hostdev__ void setAvg(const T &avg, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 2] = avg; }
+ __hostdev__ void setAvg(__global__ const T &avg, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 2] = avg; }
template <typename T>
- __hostdev__ void setDev(const T &dev, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 3] = dev; }
+ __hostdev__ void setDev(__global__ const T &dev, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 3] = dev; }
template <typename T>
- __hostdev__ void setOrigin(const T &ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T &ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<ValueIndex>
/// @brief Leaf nodes of the VDB tree. (defaults to 8x8x8 = 512 voxels)
@@ -4248,13 +5031,22 @@
typename CoordT = Coord,
template<uint32_t> class MaskT = Mask,
uint32_t Log2Dim = 3>
-class LeafNode : private LeafData<BuildT, CoordT, MaskT, Log2Dim>
+class LeafNode
+#if !defined(__KERNEL_METAL__)
+ : private LeafData<BuildT, CoordT, MaskT, Log2Dim>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafData<BuildT, CoordT, MaskT, Log2Dim> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
public:
struct ChildNodeType
{
- static constexpr uint32_t TOTAL = 0;
- static constexpr uint32_t DIM = 1;
+ static __constant__ constexpr uint32_t TOTAL = 0;
+ static __constant__ constexpr uint32_t DIM = 1;
__hostdev__ static uint32_t dim() { return 1u; }
}; // Voxel
using LeafNodeType = LeafNode<BuildT, CoordT, MaskT, Log2Dim>;
@@ -4263,7 +5055,7 @@
using FloatType = typename DataType::FloatType;
using BuildType = typename DataType::BuildType;
using CoordType = CoordT;
- static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
+ static __constant__ constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
template<uint32_t LOG2>
using MaskType = MaskT<LOG2>;
template<bool ON>
@@ -4270,31 +5062,49 @@
using MaskIterT = typename Mask<Log2Dim>::template Iterator<ON>;
/// @brief Visits all active values in a leaf node
- class ValueOnIterator : public MaskIterT<true>
+ class ValueOnIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<true>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ MaskIterT<true> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<true>;
- const LeafNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const LeafNode *mParent;
public:
__hostdev__ ValueOnIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueOnIterator(const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
- ValueOnIterator& operator=(const ValueOnIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BaseT::pos());}
- __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());}
+ __hostdev__ ValueOnIterator(__global__ const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
+ __global__ ValueOnIterator& operator=(__global__ const ValueOnIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BASE(pos)());}
+ __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BASE(pos)());}
}; // Member class ValueOnIterator
ValueOnIterator beginValueOn() const {return ValueOnIterator(this);}
/// @brief Visits all inactive values in a leaf node
- class ValueOffIterator : public MaskIterT<false>
+ class ValueOffIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<false>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ MaskIterT<false> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<false>;
- const LeafNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const LeafNode *mParent;
public:
__hostdev__ ValueOffIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueOffIterator(const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOff()), mParent(parent) {}
- ValueOffIterator& operator=(const ValueOffIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BaseT::pos());}
- __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());}
+ __hostdev__ ValueOffIterator(__global__ const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOff()), mParent(parent) {}
+ __global__ ValueOffIterator& operator=(__global__ const ValueOffIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BASE(pos)());}
+ __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BASE(pos)());}
}; // Member class ValueOffIterator
ValueOffIterator beginValueOff() const {return ValueOffIterator(this);}
@@ -4302,17 +5112,17 @@
/// @brief Visits all values in a leaf node, i.e. both active and inactive values
class ValueIterator
{
- const LeafNode *mParent;
+ __global__ const LeafNode *mParent;
uint32_t mPos;
public:
__hostdev__ ValueIterator() : mParent(nullptr), mPos(1u << 3 * Log2Dim) {}
- __hostdev__ ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);}
- ValueIterator& operator=(const ValueIterator&) = default;
+ __hostdev__ ValueIterator(__global__ const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);}
+ __global__ ValueIterator& operator=(__global__ const ValueIterator&) = default;
__hostdev__ ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->getValue(mPos);}
__hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);}
__hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->isActive(mPos);}
__hostdev__ operator bool() const {return mPos < (1u << 3 * Log2Dim);}
- __hostdev__ ValueIterator& operator++() {++mPos; return *this;}
+ __hostdev__ __global__ ValueIterator& operator++() {++mPos; return *this;}
__hostdev__ ValueIterator operator++(int) {
auto tmp = *this;
++(*this);
@@ -4320,43 +5130,49 @@
}
}; // Member class ValueIterator
+#if defined(__KERNEL_METAL__)
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
+
ValueIterator beginValue() const {return ValueIterator(this);}
static_assert(is_same<ValueType,typename BuildToValueMap<BuildType>::Type>::value, "Mismatching BuildType");
- static constexpr uint32_t LOG2DIM = Log2Dim;
- static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes
- static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
- static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node
- static constexpr uint32_t MASK = (1u << LOG2DIM) - 1u; // mask for bit operations
- static constexpr uint32_t LEVEL = 0; // level 0 = leaf
- static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
+ static __constant__ constexpr uint32_t LOG2DIM = Log2Dim;
+ static __constant__ constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes
+ static __constant__ constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
+ static __constant__ constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node
+ static __constant__ constexpr uint32_t MASK = (1u << LOG2DIM) - 1u; // mask for bit operations
+ static __constant__ constexpr uint32_t LEVEL = 0; // level 0 = leaf
+ static __constant__ constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() __global__ const { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return a const reference to the bit mask of active voxels in this leaf node
- __hostdev__ const MaskType<LOG2DIM>& valueMask() const { return DataType::mValueMask; }
+ __hostdev__ __global__ const MaskType<LOG2DIM>& valueMask() const __global__ { return DataType::mValueMask; }
/// @brief Return a const reference to the minimum active value encoded in this leaf node
- __hostdev__ ValueType minimum() const { return this->getMin(); }
+ __hostdev__ ValueType minimum() const __global__ { return this->getMin(); }
/// @brief Return a const reference to the maximum active value encoded in this leaf node
- __hostdev__ ValueType maximum() const { return this->getMax(); }
+ __hostdev__ ValueType maximum() const __global__ { return this->getMax(); }
/// @brief Return a const reference to the average of all the active values encoded in this leaf node
- __hostdev__ FloatType average() const { return DataType::getAvg(); }
+ __hostdev__ FloatType average() const __global__ { return DataType::getAvg(); }
/// @brief Return the variance of all the active values encoded in this leaf node
- __hostdev__ FloatType variance() const { return DataType::getDev()*DataType::getDev(); }
+ __hostdev__ FloatType variance() const __global__ { return DataType::getDev()*DataType::getDev(); }
/// @brief Return a const reference to the standard deviation of all the active values encoded in this leaf node
- __hostdev__ FloatType stdDeviation() const { return DataType::getDev(); }
+ __hostdev__ FloatType stdDeviation() const __global__ { return DataType::getDev(); }
- __hostdev__ uint8_t flags() const { return DataType::mFlags; }
+ __hostdev__ uint8_t flags() const __global__ { return DataType::mFlags; }
/// @brief Return the origin in index space of this leaf node
- __hostdev__ CoordT origin() const { return DataType::mBBoxMin & ~MASK; }
+ __hostdev__ CoordT origin() const __global__ { return DataType::mBBoxMin & ~MASK; }
__hostdev__ static CoordT OffsetToLocalCoord(uint32_t n)
{
@@ -4366,9 +5182,9 @@
}
/// @brief Converts (in place) a local index coordinate to a global index coordinate
- __hostdev__ void localToGlobalCoord(Coord& ijk) const { ijk += this->origin(); }
+ __hostdev__ void localToGlobalCoord(__global__ Coord& ijk) const __global__ { ijk += this->origin(); }
- __hostdev__ CoordT offsetToGlobalCoord(uint32_t n) const
+ __hostdev__ CoordT offsetToGlobalCoord(uint32_t n) const __global__
{
return OffsetToLocalCoord(n) + this->origin();
}
@@ -4377,7 +5193,7 @@
__hostdev__ static uint32_t dim() { return 1u << LOG2DIM; }
/// @brief Return the bounding box in index space of active values in this leaf node
- __hostdev__ BBox<CoordT> bbox() const
+ __hostdev__ BBox<CoordT> bbox() const __global__
{
BBox<CoordT> bbox(DataType::mBBoxMin, DataType::mBBoxMin);
if ( this->hasBBox() ) {
@@ -4399,54 +5215,85 @@
__hostdev__ uint64_t memUsage() { return DataType::memUsage(); }
/// @brief This class cannot be constructed or deleted
- LeafNode() = delete;
- LeafNode(const LeafNode&) = delete;
- LeafNode& operator=(const LeafNode&) = delete;
- ~LeafNode() = delete;
+ LeafNode() __global__ = delete;
+ LeafNode(__global__ const LeafNode&) __global__ = delete;
+ __global__ LeafNode& operator=(__global__ const LeafNode&) __global__ = delete;
+ ~LeafNode() __global__ = delete;
/// @brief Return the voxel value at the given offset.
- __hostdev__ ValueType getValue(uint32_t offset) const { return DataType::getValue(offset); }
+ __hostdev__ ValueType getValue(uint32_t offset) const __global__ { return DataType::getValue(offset); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(uint32_t offset) const __local__ { return DataType::getValue(offset); }
+#endif
+
/// @brief Return the voxel value at the given coordinate.
- __hostdev__ ValueType getValue(const CoordT& ijk) const { return DataType::getValue(CoordToOffset(ijk)); }
+ __hostdev__ ValueType getValue(__global__ const CoordT& ijk) const __global__ { return BASE(getValue)(CoordToOffset(ijk)); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordT& ijk) const __global__ { return BASE(getValue)(CoordToOffset(ijk)); }
+ __hostdev__ ValueType getValue(__local__ const CoordT& ijk) const __local__ { return BASE(getValue)(CoordToOffset(ijk)); }
+#endif
/// @brief Sets the value at the specified location and activate its state.
///
/// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes)
- __hostdev__ void setValue(const CoordT& ijk, const ValueType& v) { DataType::setValue(CoordToOffset(ijk), v); }
+ __hostdev__ void setValue(__global__ const CoordT& ijk, __global__ const ValueType& v) __global__ { DataType::setValue(CoordToOffset(ijk), v); }
/// @brief Sets the value at the specified location but leaves its state unchanged.
///
/// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes)
- __hostdev__ void setValueOnly(uint32_t offset, const ValueType& v) { DataType::setValueOnly(offset, v); }
- __hostdev__ void setValueOnly(const CoordT& ijk, const ValueType& v) { DataType::setValueOnly(CoordToOffset(ijk), v); }
+ __hostdev__ void setValueOnly(uint32_t offset, __global__ const ValueType& v) __global__ { DataType::setValueOnly(offset, v); }
+ __hostdev__ void setValueOnly(__global__ const CoordT& ijk, __global__ const ValueType& v) __global__ { DataType::setValueOnly(CoordToOffset(ijk), v); }
/// @brief Return @c true if the voxel value at the given coordinate is active.
- __hostdev__ bool isActive(const CoordT& ijk) const { return DataType::mValueMask.isOn(CoordToOffset(ijk)); }
- __hostdev__ bool isActive(uint32_t n) const { return DataType::mValueMask.isOn(n); }
+ __hostdev__ bool isActive(__global__ const CoordT& ijk) const __global__ { return BASE(mValueMask).isOn(CoordToOffset(ijk)); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordT& ijk) const __global__ { return BASE(mValueMask).isOn(CoordToOffset(ijk)); }
+ __hostdev__ bool isActive(__local__ const CoordT& ijk) const __local__ { return BASE(mValueMask).isOn(CoordToOffset(ijk)); }
+#endif
+ __hostdev__ bool isActive(uint32_t n) const __global__ { return BASE(mValueMask).isOn(n); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(uint32_t n) const __local__ { return BASE(mValueMask).isOn(n); }
+#endif
+
/// @brief Return @c true if any of the voxel value are active in this leaf node.
- __hostdev__ bool isActive() const
+ __hostdev__ bool isActive() const __global__
{
//NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != DataType::mValueMask.isOff() );
//return DataType::mFlags & uint8_t(2);
return !DataType::mValueMask.isOff();
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive() const __local__
+ {
+ NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != BASE(mValueMask).isOff() );
+ return DataType::mFlags & uint8_t(2);
+ }
+#endif
__hostdev__ bool hasBBox() const {return DataType::mFlags & uint8_t(2);}
/// @brief Return @c true if the voxel value at the given coordinate is active and updates @c v with the value.
- __hostdev__ bool probeValue(const CoordT& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordT& ijk, __global__ ValueType& v) const __global__
{
const uint32_t n = CoordToOffset(ijk);
v = DataType::getValue(n);
- return DataType::mValueMask.isOn(n);
+ return BASE(mValueMask).isOn(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool probeValue(__local__ const CoordT& ijk, __local__ ValueType& v) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ v = BASE(getValue)(n);
+ return BASE(mValueMask).isOn(n);
+ }
+#endif
- __hostdev__ const LeafNode* probeLeaf(const CoordT&) const { return this; }
+ __hostdev__ __global__ const LeafNode* probeLeaf(__global__ const CoordT&) const __global__ { return this; }
/// @brief Return the linear offset corresponding to the given coordinate
- __hostdev__ static uint32_t CoordToOffset(const CoordT& ijk)
+ __hostdev__ static uint32_t CoordToOffset(__global__ const CoordT& ijk)
{
#if 0
return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK);
@@ -4454,6 +5301,16 @@
return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK);
#endif
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ static uint32_t CoordToOffset(__local__ const CoordT& ijk)
+ {
+ #if 0
+ return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK);
+ #else
+ return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK);
+ #endif
+ }
+#endif
/// @brief Updates the local bounding box of active voxels in this node. Return true if bbox was updated.
///
@@ -4461,8 +5318,9 @@
///
/// @details This method is based on few (intrinsic) bit operations and hence is relatively fast.
/// However, it should only only be called of either the value mask has changed or if the
+
/// active bounding box is still undefined. e.g. during construction of this node.
- __hostdev__ bool updateBBox();
+ __hostdev__ bool updateBBox() __global__;
private:
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(LeafData) is misaligned");
@@ -4478,11 +5336,15 @@
/// @brief Private method to return a voxel value and update a (dummy) ReadAccessor
template<typename AccT>
- __hostdev__ ValueType getValueAndCache(const CoordT& ijk, const AccT&) const { return this->getValue(ijk); }
+ __hostdev__ ValueType getValueAndCache(__global__ const CoordT& ijk, __global__ const AccT&) const __global__ { return this->getValue(ijk); }
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordT& ijk, __local__ const AccT&) const __global__ { return this->getValue(ijk); }
+#endif
/// @brief Return the node information.
template<typename AccT>
- __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const {
+ __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(__global__ const CoordType& /*ijk*/, __global__ const AccT& /*acc*/) const __global__ {
using NodeInfoT = typename AccT::NodeInfo;
return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(),
this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]};
@@ -4489,16 +5351,20 @@
}
template<typename AccT>
- __hostdev__ bool isActiveAndCache(const CoordT& ijk, const AccT&) const { return this->isActive(ijk); }
+ __hostdev__ bool isActiveAndCache(__global__ const CoordT& ijk, __global__ const AccT&) const __global__ { return this->isActive(ijk); }
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ bool isActiveAndCache(__local__ const CoordT& ijk, __local__ const AccT&) const __global__ { return this->isActive(ijk); }
+#endif
template<typename AccT>
- __hostdev__ bool probeValueAndCache(const CoordT& ijk, ValueType& v, const AccT&) const { return this->probeValue(ijk, v); }
+ __hostdev__ bool probeValueAndCache(__global__ const CoordT& ijk, __global__ ValueType& v, __global__ const AccT&) const __global__ { return this->probeValue(ijk, v); }
template<typename AccT>
- __hostdev__ const LeafNode* probeLeafAndCache(const CoordT&, const AccT&) const { return this; }
+ __hostdev__ __global__ const LeafNode* probeLeafAndCache(__global__ const CoordT&, __global__ const AccT&) const __global__ { return this; }
template<typename RayT, typename AccT>
- __hostdev__ uint32_t getDimAndCache(const CoordT&, const RayT& /*ray*/, const AccT&) const
+ __hostdev__ uint32_t getDimAndCache(__global__ const CoordT&, __global__ const RayT& /*ray*/, __global__ const AccT&) const __global__
{
if (DataType::mFlags & uint8_t(1u)) return this->dim(); // skip this node if the 1st bit is set
@@ -4505,11 +5371,21 @@
//if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM;
return ChildNodeType::dim();
}
-
+#if defined(__KERNEL_METAL__)
+ template<typename RayT, typename AccT>
+ __hostdev__ uint32_t getDimAndCache(__local__ const CoordT&, __local__ const RayT& /*ray*/, __local__ const AccT&) const __global__
+ {
+ if (BASE(mFlags) & uint8_t(1))
+ return this->dim(); // skip this node if first bit is set
+ //if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM;
+ return ChildNodeType::dim();
+ }
+#endif
+#undef BASE
}; // LeafNode class
template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
-__hostdev__ inline bool LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
+__hostdev__ inline bool LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox() __global__
{
static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
if (DataType::mValueMask.isOff()) {
@@ -4516,11 +5392,21 @@
DataType::mFlags &= ~uint8_t(2);// set 2nd bit off, which indicates that this nodes has no bbox
return false;
}
+#if defined(__KERNEL_METAL__)
+ struct Update {
+ static void update(__global__ DataType &d, uint32_t min, uint32_t max, int axis) {
+ NANOVDB_ASSERT(min <= max && max < 8);
+ d.mBBoxMin[axis] = (d.mBBoxMin[axis] & ~MASK) + int(min);
+ d.mBBoxDif[axis] = uint8_t(max - min);
+ }
+ };
+#else
auto update = [&](uint32_t min, uint32_t max, int axis) {
NANOVDB_ASSERT(min <= max && max < 8);
DataType::mBBoxMin[axis] = (DataType::mBBoxMin[axis] & ~MASK) + int(min);
DataType::mBBoxDif[axis] = uint8_t(max - min);
};
+#endif
uint64_t word64 = DataType::mValueMask.template getWord<uint64_t>(0);
uint32_t Xmin = word64 ? 0u : 8u;
uint32_t Xmax = Xmin;
@@ -4534,6 +5420,17 @@
}
}
NANOVDB_ASSERT(word64);
+#if defined(__KERNEL_METAL__)
+ Update::update(this, Xmin, Xmax, 0);
+ Update::update(this, FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1);
+ __local__ const uint32_t *p = reinterpret_cast<__local__ const uint32_t*>(&word64), word32 = p[0] | p[1];
+ __local__ const uint16_t *q = reinterpret_cast<__local__ const uint16_t*>(&word32), word16 = q[0] | q[1];
+ __local__ const uint8_t *b = reinterpret_cast<__local__ const uint8_t* >(&word16), byte = b[0] | b[1];
+ NANOVDB_ASSERT(byte);
+ Update::update(this, FindLowestOn(static_cast<uint32_t>(byte)), FindHighestOn(static_cast<uint32_t>(byte)), 2);
+ DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox
+ return true;
+#else
update(Xmin, Xmax, 0);
update(FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1);
const uint32_t *p = reinterpret_cast<const uint32_t*>(&word64), word32 = p[0] | p[1];
@@ -4541,8 +5438,9 @@
const uint8_t *b = reinterpret_cast<const uint8_t* >(&word16), byte = b[0] | b[1];
NANOVDB_ASSERT(byte);
update(FindLowestOn(static_cast<uint32_t>(byte)), FindHighestOn(static_cast<uint32_t>(byte)), 2);
- DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox
+ DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox
return true;
+#endif
} // LeafNode::updateBBox
// --------------------------> Template specializations and traits <------------------------------------
@@ -4651,12 +5549,12 @@
using FloatType = typename RootT::FloatType;
using CoordValueType = typename RootT::CoordType::ValueType;
- mutable const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const)
+ mutable __global__ const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const)
public:
using ValueType = typename RootT::ValueType;
using CoordType = typename RootT::CoordType;
- static const int CacheLevels = 0;
+ static __constant__ const int CacheLevels = 0;
struct NodeInfo {
uint32_t mLevel; // 4B
@@ -4670,60 +5568,77 @@
};
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root) : mRoot{&root} {}
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__ : mRoot{&root} {}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
/// @brief Reset this access to its initial state, i.e. with an empty cache
/// @node Noop since this template specialization has no cache
__hostdev__ void clear() {}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
- ~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ ReadAccessor(__local__ const ReadAccessor&) __local__ = default;
+ ~ReadAccessor() __local__ = default;
+ __local__ ReadAccessor& operator=(__local__ const ReadAccessor&) __local__ = default;
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __local__
{
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __local__
{
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif
+
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __local__
+ {
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __local__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __local__
{
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __local__
{
return mRoot->isActiveAndCache(ijk, *this);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+ return mRoot->isActiveAndCache(ijk, *this);
+ }
+#endif
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __local__
{
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __local__
{
return mRoot->probeLeafAndCache(ijk, *this);
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __local__
{
return mRoot->getDimAndCache(ijk, ray, *this);
}
@@ -4739,7 +5654,11 @@
/// @brief No-op
template<typename NodeT>
- __hostdev__ void insert(const CoordType&, const NodeT*) const {}
+ __hostdev__ void insert(__global__ const CoordType&, __global__ const NodeT*) const __local__ {}
+#if defined(__KERNEL_METAL__)
+ template<typename NodeT>
+ __hostdev__ void insert(__local__ const CoordType&, __global__ const NodeT*) const __local__ {}
+#endif
}; // ReadAccessor<ValueT, -1, -1, -1> class
/// @brief Node caching at a single tree level
@@ -4761,19 +5680,19 @@
// All member data are mutable to allow for access methods to be const
mutable CoordT mKey; // 3*4 = 12 bytes
- mutable const RootT* mRoot; // 8 bytes
- mutable const NodeT* mNode; // 8 bytes
+ mutable __global__ const RootT* mRoot; // 8 bytes
+ mutable __global__ const NodeT* mNode; // 8 bytes
public:
using ValueType = ValueT;
using CoordType = CoordT;
- static const int CacheLevels = 1;
+ static __constant__ const int CacheLevels = 1;
using NodeInfo = typename ReadAccessor<ValueT, -1, -1, -1>::NodeInfo;
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root)
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__
: mKey(CoordType::max())
, mRoot(&root)
, mNode(nullptr)
@@ -4781,10 +5700,10 @@
}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
/// @brief Reset this access to its initial state, i.e. with an empty cache
__hostdev__ void clear()
@@ -4793,21 +5712,38 @@
mNode = nullptr;
}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
- ~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ ReadAccessor(__global__ const ReadAccessor&) __global__ = default;
+ ~ReadAccessor() __global__ = default;
+ __global__ ReadAccessor& operator=(__global__ const ReadAccessor&) __global__ = default;
- __hostdev__ bool isCached(const CoordType& ijk) const
+ __hostdev__ bool isCached(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] &&
(ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] &&
(ijk[2] & int32_t(~NodeT::MASK)) == mKey[2];
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __global__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] &&
+ (ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] &&
+ (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2];
+ }
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __local__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] &&
+ (ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] &&
+ (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2];
+ }
+#endif
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->getValueAndCache(ijk, *this);
@@ -4814,16 +5750,26 @@
}
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
{
+ if (this->isCached(ijk)) {
+ return mNode->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif
+
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __global__
+ {
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __global__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->getNodeInfoAndCache(ijk, *this);
@@ -4831,7 +5777,7 @@
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->isActiveAndCache(ijk, *this);
@@ -4838,8 +5784,17 @@
}
return mRoot->isActiveAndCache(ijk, *this);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __local__
+ {
+ if (this->isCached(ijk)) {
+ return mNode->isActiveAndCache(ijk, *this);
+ }
+ return mRoot->isActiveAndCache(ijk, *this);
+ }
+#endif
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
if (this->isCached(ijk)) {
return mNode->probeValueAndCache(ijk, v, *this);
@@ -4847,7 +5802,7 @@
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->probeLeafAndCache(ijk, *this);
@@ -4856,7 +5811,7 @@
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __global__
{
if (this->isCached(ijk)) {
return mNode->getDimAndCache(ijk, ray, *this);
@@ -4874,15 +5829,26 @@
friend class LeafNode;
/// @brief Inserts a leaf node and key pair into this ReadAccessor
- __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const
+ __hostdev__ void insert(__global__ const CoordType& ijk, __global__ const NodeT* node) const __local__
{
mKey = ijk & ~NodeT::MASK;
mNode = node;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const NodeT* node) const __local__
+ {
+ mKey = ijk & ~NodeT::MASK;
+ mNode = node;
+ }
+#endif
// no-op
template<typename OtherNodeT>
- __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {}
+ __hostdev__ void insert(__global__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#if defined(__KERNEL_METAL__)
+ template<typename OtherNodeT>
+ __hostdev__ void insert(__local__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#endif
}; // ReadAccessor<ValueT, LEVEL0>
@@ -4909,20 +5875,20 @@
#else // 68 bytes total
mutable CoordT mKeys[2]; // 2*3*4 = 24 bytes
#endif
- mutable const RootT* mRoot;
- mutable const Node1T* mNode1;
- mutable const Node2T* mNode2;
+ mutable __global__ const RootT* mRoot;
+ mutable __global__ const Node1T* mNode1;
+ mutable __global__ const Node2T* mNode2;
public:
using ValueType = ValueT;
using CoordType = CoordT;
- static const int CacheLevels = 2;
+ static __constant__ const int CacheLevels = 2;
using NodeInfo = typename ReadAccessor<ValueT,-1,-1,-1>::NodeInfo;
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root)
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__
#ifdef USE_SINGLE_ACCESSOR_KEY
: mKey(CoordType::max())
#else
@@ -4935,10 +5901,10 @@
}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
/// @brief Reset this access to its initial state, i.e. with an empty cache
__hostdev__ void clear()
@@ -4952,15 +5918,18 @@
mNode2 = nullptr;
}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
+ ReadAccessor(__global__ const ReadAccessor&) __global__ = default;
~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ __global__ ReadAccessor& operator=(__global__ const ReadAccessor&) __global__ = default;
#ifdef USE_SINGLE_ACCESSOR_KEY
- __hostdev__ bool isCached1(CoordValueType dirty) const
+ __hostdev__ bool isCached1(CoordValueType dirty) const __global__
{
if (!mNode1)
return false;
@@ -4970,7 +5939,7 @@
}
return true;
}
- __hostdev__ bool isCached2(CoordValueType dirty) const
+ __hostdev__ bool isCached2(CoordValueType dirty) const __global__
{
if (!mNode2)
return false;
@@ -4980,18 +5949,18 @@
}
return true;
}
- __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const
+ __hostdev__ CoordValueType computeDirty(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] ^ mKey[0]) | (ijk[1] ^ mKey[1]) | (ijk[2] ^ mKey[2]);
}
#else
- __hostdev__ bool isCached1(const CoordType& ijk) const
+ __hostdev__ bool isCached1(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~Node1T::MASK)) == mKeys[0][0] &&
(ijk[1] & int32_t(~Node1T::MASK)) == mKeys[0][1] &&
(ijk[2] & int32_t(~Node1T::MASK)) == mKeys[0][2];
}
- __hostdev__ bool isCached2(const CoordType& ijk) const
+ __hostdev__ bool isCached2(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~Node2T::MASK)) == mKeys[1][0] &&
(ijk[1] & int32_t(~Node2T::MASK)) == mKeys[1][1] &&
@@ -4999,12 +5968,12 @@
}
#endif
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->getValueAndCache(ijk, *this);
@@ -5013,21 +5982,37 @@
}
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
{
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __global__ auto&& dirty = ijk;
+#endif
+ if (this->isCached1(dirty)) {
+ return mNode1->getValueAndCache(ijk, *this);
+ } else if (this->isCached2(dirty)) {
+ return mNode2->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __global__
+ {
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __global__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->getNodeInfoAndCache(ijk, *this);
@@ -5037,12 +6022,12 @@
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->isActiveAndCache(ijk, *this);
@@ -5052,12 +6037,12 @@
return mRoot->isActiveAndCache(ijk, *this);
}
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->probeValueAndCache(ijk, v, *this);
@@ -5067,12 +6052,12 @@
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->probeLeafAndCache(ijk, *this);
@@ -5083,12 +6068,12 @@
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->getDimAndCache(ijk, ray, *this);
@@ -5108,7 +6093,7 @@
friend class LeafNode;
/// @brief Inserts a leaf node and key pair into this ReadAccessor
- __hostdev__ void insert(const CoordType& ijk, const Node1T* node) const
+ __hostdev__ void insert(__global__ const CoordType& ijk, __global__ const Node1T* node) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
mKey = ijk;
@@ -5117,7 +6102,7 @@
#endif
mNode1 = node;
}
- __hostdev__ void insert(const CoordType& ijk, const Node2T* node) const
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const Node2T* node) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
mKey = ijk;
@@ -5127,7 +6112,11 @@
mNode2 = node;
}
template <typename OtherNodeT>
- __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {}
+ __hostdev__ void insert(__global__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#if defined(__KERNEL_METAL__)
+ template <typename OtherNodeT>
+ __hostdev__ void insert(__local__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#endif
}; // ReadAccessor<BuildT, LEVEL0, LEVEL1>
@@ -5145,7 +6134,7 @@
using ValueT = typename RootT::ValueType;
using FloatType = typename RootT::FloatType;
- using CoordValueType = typename RootT::CoordT::ValueType;
+ using CoordValueType = typename RootT::CoordType::ValueType;
// All member data are mutable to allow for access methods to be const
#ifdef USE_SINGLE_ACCESSOR_KEY // 44 bytes total
@@ -5153,19 +6142,19 @@
#else // 68 bytes total
mutable CoordT mKeys[3]; // 3*3*4 = 36 bytes
#endif
- mutable const RootT* mRoot;
- mutable const void* mNode[3]; // 4*8 = 32 bytes
+ mutable __global__ const RootT* mRoot;
+ mutable __global__ const void* mNode[3]; // 4*8 = 32 bytes
public:
using ValueType = ValueT;
using CoordType = CoordT;
- static const int CacheLevels = 3;
+ static __constant__ const int CacheLevels = 3;
using NodeInfo = typename ReadAccessor<ValueT, -1, -1, -1>::NodeInfo;
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root)
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__
#ifdef USE_SINGLE_ACCESSOR_KEY
: mKey(CoordType::max())
#else
@@ -5177,35 +6166,38 @@
}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
- ~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ ReadAccessor(__local__ const ReadAccessor&) __local__ = default;
+ ~ReadAccessor() __global__ = default;
+ __global__ ReadAccessor& operator=(__global__ const ReadAccessor&) __global__ = default;
/// @brief Return a const point to the cached node of the specified type
///
/// @warning The return value could be NULL.
template<typename NodeT>
- __hostdev__ const NodeT* getNode() const
+ __hostdev__ __global__ const NodeT* getNode() const __global__
{
using T = typename NodeTrait<TreeT, NodeT::LEVEL>::type;
static_assert(is_same<T, NodeT>::value, "ReadAccessor::getNode: Invalid node type");
- return reinterpret_cast<const T*>(mNode[NodeT::LEVEL]);
+ return reinterpret_cast<__global__ const T*>(mNode[NodeT::LEVEL]);
}
template <int LEVEL>
- __hostdev__ const typename NodeTrait<TreeT, LEVEL>::type* getNode() const
+ __hostdev__ __global__ const typename NodeTrait<TreeT, LEVEL>::type* getNode() const
{
using T = typename NodeTrait<TreeT, LEVEL>::type;
static_assert(LEVEL>=0 && LEVEL<=2, "ReadAccessor::getNode: Invalid node type");
- return reinterpret_cast<const T*>(mNode[LEVEL]);
+ return reinterpret_cast<__global__ const T*>(mNode[LEVEL]);
}
@@ -5222,7 +6214,7 @@
#ifdef USE_SINGLE_ACCESSOR_KEY
template<typename NodeT>
- __hostdev__ bool isCached(CoordValueType dirty) const
+ __hostdev__ bool isCached(CoordValueType dirty) const __global__
{
if (!mNode[NodeT::LEVEL])
return false;
@@ -5233,128 +6225,229 @@
return true;
}
- __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const
+ __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const __global__
{
return (ijk[0] ^ mKey[0]) | (ijk[1] ^ mKey[1]) | (ijk[2] ^ mKey[2]);
}
#else
template<typename NodeT>
- __hostdev__ bool isCached(const CoordType& ijk) const
+ __hostdev__ bool isCached(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2];
}
+#if defined(__KERNEL_METAL__)
+ template<typename NodeT>
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __global__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2];
+ }
+ template<typename NodeT>
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __local__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2];
+ }
+#endif // __KERNEL_METAL__
#endif
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->getValue(ijk);
+ return ((__global__ LeafT*)mNode[0])->getValue(ijk);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
}
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
{
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getValue(ijk);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getValue(ijk);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif // __KERNEL_METAL__
+
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __global__
+ {
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __global__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->getNodeInfoAndCache(ijk, *this);
+ return ((__global__ LeafT*)mNode[0])->getNodeInfoAndCache(ijk, *this);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->getNodeInfoAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->getNodeInfoAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->getNodeInfoAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->getNodeInfoAndCache(ijk, *this);
}
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->isActive(ijk);
+ return ((__global__ LeafT*)mNode[0])->isActive(ijk);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->isActiveAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->isActiveAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->isActiveAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->isActiveAndCache(ijk, *this);
}
return mRoot->isActiveAndCache(ijk, *this);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->isActive(ijk);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->isActiveAndCache(ijk, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->isActiveAndCache(ijk, *this);
+ }
+ return mRoot->isActiveAndCache(ijk, *this);
+ }
+#endif
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->probeValue(ijk, v);
+ return ((__global__ LeafT*)mNode[0])->probeValue(ijk, v);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->probeValueAndCache(ijk, v, *this);
+ return ((__global__ NodeT1*)mNode[1])->probeValueAndCache(ijk, v, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->probeValueAndCache(ijk, v, *this);
+ return ((__global__ NodeT2*)mNode[2])->probeValueAndCache(ijk, v, *this);
}
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0]);
+ return ((__global__ LeafT*)mNode[0]);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->probeLeafAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->probeLeafAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->probeLeafAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->probeLeafAndCache(ijk, *this);
}
return mRoot->probeLeafAndCache(ijk, *this);
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
+ return ((__global__ LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
+ return ((__global__ NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
+ return ((__global__ NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
}
return mRoot->getDimAndCache(ijk, ray, *this);
}
+#if defined(__KERNEL_METAL__)
+ template<typename RayT>
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __local__ const RayT& ray) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __global__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
+ }
+ return mRoot->getDimAndCache(ijk, ray, *this);
+ }
+ template<typename RayT>
+ __hostdev__ uint32_t getDim(__local__ const CoordType& ijk, __local__ const RayT& ray) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
+ }
+ return mRoot->getDimAndCache(ijk, ray, *this);
+ }
+#endif // __KERNEL_METAL__
private:
/// @brief Allow nodes to insert themselves into the cache.
@@ -5367,7 +6460,7 @@
/// @brief Inserts a leaf node and key pair into this ReadAccessor
template<typename NodeT>
- __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const
+ __hostdev__ void insert(__global__ const CoordType& ijk, __global__ const NodeT* node) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
mKey = ijk;
@@ -5376,6 +6469,28 @@
#endif
mNode[NodeT::LEVEL] = node;
}
+#if defined(__KERNEL_METAL__)
+ template<typename NodeT>
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const NodeT* node) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ mKey = ijk;
+#else
+ mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK;
+#endif
+ mNode[NodeT::LEVEL] = node;
+ }
+ template<typename NodeT>
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const NodeT* node) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ mKey = ijk;
+#else
+ mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK;
+#endif
+ mNode[NodeT::LEVEL] = node;
+ }
+#endif // __KERNEL_METAL__
}; // ReadAccessor<BuildT, 0, 1, 2>
//////////////////////////////////////////////////
@@ -5393,19 +6508,19 @@
/// createAccessor<0,1,2>(grid): Caching of all nodes at all tree levels
template <int LEVEL0 = -1, int LEVEL1 = -1, int LEVEL2 = -1, typename ValueT = float>
-ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(const NanoGrid<ValueT> &grid)
+ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(__global__ const NanoGrid<ValueT> &grid)
{
return ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2>(grid);
}
template <int LEVEL0 = -1, int LEVEL1 = -1, int LEVEL2 = -1, typename ValueT = float>
-ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(const NanoTree<ValueT> &tree)
+ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(__global__ const NanoTree<ValueT> &tree)
{
return ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2>(tree);
}
template <int LEVEL0 = -1, int LEVEL1 = -1, int LEVEL2 = -1, typename ValueT = float>
-ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(const NanoRoot<ValueT> &root)
+ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(__global__ const NanoRoot<ValueT> &root)
{
return ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2>(root);
}
@@ -5424,52 +6539,59 @@
// memory-layout of the data structure and the reasons why certain methods are safe
// to call and others are not!
using GridT = NanoGrid<int>;
- __hostdev__ const GridT& grid() const { return *reinterpret_cast<const GridT*>(this); }
+ __hostdev__ __global__ const GridT& grid() const __global__ { return *reinterpret_cast<__global__ const GridT*>(this); }
public:
- __hostdev__ bool isValid() const { return this->grid().isValid(); }
- __hostdev__ uint64_t gridSize() const { return this->grid().gridSize(); }
- __hostdev__ uint32_t gridIndex() const { return this->grid().gridIndex(); }
- __hostdev__ uint32_t gridCount() const { return this->grid().gridCount(); }
- __hostdev__ const char* shortGridName() const { return this->grid().shortGridName(); }
- __hostdev__ GridType gridType() const { return this->grid().gridType(); }
- __hostdev__ GridClass gridClass() const { return this->grid().gridClass(); }
- __hostdev__ bool isLevelSet() const { return this->grid().isLevelSet(); }
- __hostdev__ bool isFogVolume() const { return this->grid().isFogVolume(); }
- __hostdev__ bool isPointIndex() const { return this->grid().isPointIndex(); }
- __hostdev__ bool isPointData() const { return this->grid().isPointData(); }
- __hostdev__ bool isMask() const { return this->grid().isMask(); }
- __hostdev__ bool isStaggered() const { return this->grid().isStaggered(); }
- __hostdev__ bool isUnknown() const { return this->grid().isUnknown(); }
- __hostdev__ const Map& map() const { return this->grid().map(); }
- __hostdev__ const BBox<Vec3R>& worldBBox() const { return this->grid().worldBBox(); }
- __hostdev__ const BBox<Coord>& indexBBox() const { return this->grid().indexBBox(); }
- __hostdev__ Vec3R voxelSize() const { return this->grid().voxelSize(); }
- __hostdev__ int blindDataCount() const { return this->grid().blindDataCount(); }
- __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return this->grid().blindMetaData(n); }
- __hostdev__ uint64_t activeVoxelCount() const { return this->grid().activeVoxelCount(); }
- __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return this->grid().tree().activeTileCount(level); }
- __hostdev__ uint32_t nodeCount(uint32_t level) const { return this->grid().tree().nodeCount(level); }
- __hostdev__ uint64_t checksum() const { return this->grid().checksum(); }
- __hostdev__ bool isEmpty() const { return this->grid().isEmpty(); }
- __hostdev__ Version version() const { return this->grid().version(); }
+ __hostdev__ bool isValid() const __global__ { return this->grid().isValid(); }
+ __hostdev__ uint64_t gridSize() const __global__ { return this->grid().gridSize(); }
+ __hostdev__ uint32_t gridIndex() const __global__ { return this->grid().gridIndex(); }
+ __hostdev__ uint32_t gridCount() const __global__ { return this->grid().gridCount(); }
+ __hostdev__ __global__ const char* shortGridName() const __global__ { return this->grid().shortGridName(); }
+ __hostdev__ GridType gridType() const __global__ { return this->grid().gridType(); }
+ __hostdev__ GridClass gridClass() const __global__ { return this->grid().gridClass(); }
+ __hostdev__ bool isLevelSet() const __global__ { return this->grid().isLevelSet(); }
+ __hostdev__ bool isFogVolume() const __global__ { return this->grid().isFogVolume(); }
+ __hostdev__ bool isPointIndex() const __global__ { return this->grid().isPointIndex(); }
+ __hostdev__ bool isPointData() const __global__ { return this->grid().isPointData(); }
+ __hostdev__ bool isMask() const __global__ { return this->grid().isMask(); }
+ __hostdev__ bool isStaggered() const __global__ { return this->grid().isStaggered(); }
+ __hostdev__ bool isUnknown() const __global__ { return this->grid().isUnknown(); }
+ __hostdev__ __global__ const Map& map() const __global__ { return this->grid().map(); }
+ __hostdev__ __global__ const BBox<Vec3R>& worldBBox() const __global__ { return this->grid().worldBBox(); }
+ __hostdev__ __global__ const BBox<Coord>& indexBBox() const __global__ { return this->grid().indexBBox(); }
+ __hostdev__ Vec3R voxelSize() const __global__ { return this->grid().voxelSize(); }
+ __hostdev__ int blindDataCount() const __global__ { return this->grid().blindDataCount(); }
+ __hostdev__ __global__ const GridBlindMetaData& blindMetaData(uint32_t n) const __global__ { return this->grid().blindMetaData(n); }
+ __hostdev__ uint64_t activeVoxelCount() const __global__ { return this->grid().activeVoxelCount(); }
+ __hostdev__ __global__ const uint32_t& activeTileCount(uint32_t level) const __global__ { return this->grid().tree().activeTileCount(level); }
+ __hostdev__ uint32_t nodeCount(uint32_t level) const __global__ { return this->grid().tree().nodeCount(level); }
+ __hostdev__ uint64_t checksum() const __global__ { return this->grid().checksum(); }
+ __hostdev__ bool isEmpty() const __global__ { return this->grid().isEmpty(); }
+ __hostdev__ Version version() const __global__ { return this->grid().version(); }
}; // GridMetaData
/// @brief Class to access points at a specific voxel location
template<typename AttT>
-class PointAccessor : public DefaultReadAccessor<uint32_t>
+class PointAccessor
+#if !defined(__KERNEL_METAL__)
+ : public DefaultReadAccessor<uint32_t>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ DefaultReadAccessor<uint32_t> AccT;
+#else
using AccT = DefaultReadAccessor<uint32_t>;
- const UInt32Grid* mGrid;
- const AttT* mData;
+#endif
+ const __global__ UInt32Grid* mGrid;
+ const __global__ AttT* mData;
public:
using LeafNodeType = typename NanoRoot<uint32_t>::LeafNodeType;
- PointAccessor(const UInt32Grid& grid)
+ PointAccessor(__global__ const UInt32Grid& grid) __local__
: AccT(grid.tree().root())
, mGrid(&grid)
- , mData(reinterpret_cast<const AttT*>(grid.blindData(0)))
+ , mData(reinterpret_cast<__global__ const AttT*>(grid.blindData(0)))
{
NANOVDB_ASSERT(grid.gridType() == GridType::UInt32);
NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same<uint32_t, AttT>::value) ||
@@ -5478,7 +6600,7 @@
}
/// @brief Return the total number of point in the grid and set the
/// iterators to the complete range of points.
- __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const
+ __hostdev__ uint64_t gridPoints(__global__ const AttT*& begin, __global__ const AttT*& end) const __global__
{
const uint64_t count = mGrid->blindMetaData(0u).mElementCount;
begin = mData;
@@ -5488,9 +6610,9 @@
/// @brief Return the number of points in the leaf node containing the coordinate @a ijk.
/// If this return value is larger than zero then the iterators @a begin and @a end
/// will point to all the attributes contained within that leaf node.
- __hostdev__ uint64_t leafPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const
+ __hostdev__ uint64_t leafPoints(__global__ const Coord& ijk, __global__ const AttT*& begin, __global__ const AttT*& end) const __global__
{
- auto* leaf = this->probeLeaf(ijk);
+ __global__ auto* leaf = this->probeLeaf(ijk);
if (leaf == nullptr) {
return 0;
}
@@ -5500,14 +6622,14 @@
}
/// @brief get iterators over offsets to points at a specific voxel location
- __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const
+ __hostdev__ uint64_t voxelPoints(__global__ const Coord& ijk, __global__ const AttT*& begin, __global__ const AttT*& end) const __global__
{
- auto* leaf = this->probeLeaf(ijk);
+ __global__ auto* leaf = this->probeLeaf(ijk);
if (leaf == nullptr)
return 0;
const uint32_t offset = LeafNodeType::CoordToOffset(ijk);
if (leaf->isActive(offset)) {
- auto* p = mData + leaf->minimum();
+ __global__ auto* p = mData + leaf->minimum();
begin = p + (offset == 0 ? 0 : leaf->getValue(offset - 1));
end = p + leaf->getValue(offset);
return end - begin;
@@ -5520,11 +6642,20 @@
///
/// @note The ChannelT template parameter can be either const and non-const.
template<typename ChannelT>
-class ChannelAccessor : public DefaultReadAccessor<ValueIndex>
+class ChannelAccessor
+#if !defined (__KERNEL_METAL__)
+ : public DefaultReadAccessor<ValueIndex>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ DefaultReadAccessor<ValueIndex> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = DefaultReadAccessor<ValueIndex>;
- const IndexGrid &mGrid;
- ChannelT *mChannel;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const IndexGrid &mGrid;
+ __global__ ChannelT *mChannel;
public:
using ValueType = ChannelT;
@@ -5533,7 +6664,7 @@
/// @brief Ctor from an IndexGrid and an integer ID of an internal channel
/// that is assumed to exist as blind data in the IndexGrid.
- __hostdev__ ChannelAccessor(const IndexGrid& grid, uint32_t channelID = 0u)
+ __hostdev__ ChannelAccessor(__global__ const IndexGrid& grid, uint32_t channelID = 0u)
: BaseT(grid.tree().root())
, mGrid(grid)
, mChannel(nullptr)
@@ -5544,7 +6675,7 @@
}
/// @brief Ctor from an IndexGrid and an external channel
- __hostdev__ ChannelAccessor(const IndexGrid& grid, ChannelT *channelPtr)
+ __hostdev__ ChannelAccessor(__global__ const IndexGrid& grid, __global__ ChannelT *channelPtr)
: BaseT(grid.tree().root())
, mGrid(grid)
, mChannel(channelPtr)
@@ -5555,19 +6686,19 @@
}
/// @brief Return a const reference to the IndexGrid
- __hostdev__ const IndexGrid &grid() const {return mGrid;}
+ __hostdev__ __global__ const IndexGrid &grid() const {return mGrid;}
/// @brief Return a const reference to the tree of the IndexGrid
- __hostdev__ const IndexTree &tree() const {return mGrid.tree();}
+ __hostdev__ __global__ const IndexTree &tree() const {return mGrid.tree();}
/// @brief Return a vector of the axial voxel sizes
- __hostdev__ const Vec3R& voxelSize() const { return mGrid.voxelSize(); }
+ __hostdev__ __global__ const Vec3R& voxelSize() const { return mGrid.voxelSize(); }
/// @brief Return total number of values indexed by the IndexGrid
- __hostdev__ const uint64_t& valueCount() const { return mGrid.valueCount(); }
+ __hostdev__ uint64_t valueCount() const { return mGrid.valueCount(); }
/// @brief Change to an external channel
- __hostdev__ void setChannel(ChannelT *channelPtr)
+ __hostdev__ void setChannel(__global__ ChannelT *channelPtr)
{
mChannel = channelPtr;
NANOVDB_ASSERT(mChannel);
@@ -5577,23 +6708,24 @@
/// in the IndexGrid.
__hostdev__ void setChannel(uint32_t channelID)
{
- this->setChannel(reinterpret_cast<ChannelT*>(const_cast<void*>(mGrid.blindData(channelID))));
+ this->setChannel(reinterpret_cast<__global__ ChannelT*>(const_cast<__global__ void*>(mGrid.blindData(channelID))));
}
/// @brief Return the linear offset into a channel that maps to the specified coordinate
- __hostdev__ uint64_t getIndex(const Coord& ijk) const {return BaseT::getValue(ijk);}
- __hostdev__ uint64_t idx(int i, int j, int k) const {return BaseT::getValue(Coord(i,j,k));}
+ __hostdev__ uint64_t getIndex(__global__ const Coord& ijk) const {return BASE(getValue)(ijk);}
+ __hostdev__ uint64_t idx(int i, int j, int k) const {return BASE(getValue)(Coord(i,j,k));}
/// @brief Return the value from a cached channel that maps to the specified coordinate
- __hostdev__ ChannelT& getValue(const Coord& ijk) const {return mChannel[BaseT::getValue(ijk)];}
- __hostdev__ ChannelT& operator()(const Coord& ijk) const {return this->getValue(ijk);}
- __hostdev__ ChannelT& operator()(int i, int j, int k) const {return this->getValue(Coord(i,j,k));}
+ __hostdev__ __global__ ChannelT& getValue(__global__ const Coord& ijk) const {return mChannel[BASE(getValue)(ijk)];}
+ __hostdev__ __global__ ChannelT& operator()(__global__ const Coord& ijk) const {return this->getValue(ijk);}
+ __hostdev__ __global__ ChannelT& operator()(int i, int j, int k) const {return this->getValue(Coord(i,j,k));}
/// @brief return the state and updates the value of the specified voxel
- __hostdev__ bool probeValue(const CoordType& ijk, typename remove_const<ChannelT>::type &v) const
+ using CoordType = DefaultReadAccessor<ValueIndex>::CoordType;
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ typename remove_const<ChannelT>::type &v) const
{
uint64_t idx;
- const bool isActive = BaseT::probeValue(ijk, idx);
+ const bool isActive = BASE(probeValue)(ijk, idx);
v = mChannel[idx];
return isActive;
}
@@ -5601,7 +6733,7 @@
///
/// @note The template parameter can be either const or non-const
template <typename T>
- __hostdev__ T& getValue(const Coord& ijk, T* channelPtr) const {return channelPtr[BaseT::getValue(ijk)];}
+ __hostdev__ __global__ T& getValue(__global__ const Coord& ijk, __global__ T* channelPtr) const {return channelPtr[BASE(getValue)(ijk)];}
}; // ChannelAccessor
@@ -5643,6 +6775,7 @@
/// @throw std::invalid_argument if buffer does not point to a valid NanoVDB grid.
///
/// @warning This is pretty ugly code that involves lots of pointer and bit manipulations - not for the faint of heart :)
+#if !defined(__KERNEL_METAL__)
template <typename StreamT>// StreamT class must support: "void write(char*, size_t)"
void writeUncompressedGrid(StreamT &os, const void *buffer)
{
@@ -5768,7 +6901,7 @@
}
return readUncompressedGrids<GridHandleT, StreamT, VecT>(is, buffer);
}// readUncompressedGrids
-
+#endif // #if !defined(__KERNEL_METAL__)
} // namespace io
#endif// if !defined(__CUDA_ARCH__) && !defined(__HIP__)
Index: nanovdb/nanovdb/util/SampleFromVoxels.h
===================================================================
--- nanovdb/nanovdb/util/SampleFromVoxels.h (revision 63221)
+++ nanovdb/nanovdb/util/SampleFromVoxels.h (working copy)
@@ -1,983 +1,1120 @@
-// Copyright Contributors to the OpenVDB Project
-// SPDX-License-Identifier: MPL-2.0
-
-//////////////////////////////////////////////////////////////////////////
-///
-/// @file SampleFromVoxels.h
-///
-/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler
-///
-/// @note These interpolators employ internal caching for better performance when used repeatedly
-/// in the same voxel location, so try to reuse an instance of these classes more than once.
-///
-/// @warning While all the interpolators defined below work with both scalars and vectors
-/// values (e.g. float and Vec3<float>) TrilinarSampler::zeroCrossing and
-/// Trilinear::gradient will only compile with floating point value types.
-///
-/// @author Ken Museth
-///
-///////////////////////////////////////////////////////////////////////////
-
-#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
-#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
-
-// Only define __hostdev__ when compiling as NVIDIA CUDA
-#if defined(__CUDACC__) || defined(__HIP__)
-#define __hostdev__ __host__ __device__
-#else
-#include <cmath> // for floor
-#define __hostdev__
-#endif
-
-namespace nanovdb {
-
-// Forward declaration of sampler with specific polynomial orders
-template<typename TreeT, int Order, bool UseCache = true>
-class SampleFromVoxels;
-
-/// @brief Factory free-function for a sampler of specific polynomial orders
-///
-/// @details This allows for the compact syntax:
-/// @code
-/// auto acc = grid.getAccessor();
-/// auto smp = nanovdb::createSampler<1>( acc );
-/// @endcode
-template<int Order, typename TreeOrAccT, bool UseCache = true>
-__hostdev__ SampleFromVoxels<TreeOrAccT, Order, UseCache> createSampler(const TreeOrAccT& acc)
-{
- return SampleFromVoxels<TreeOrAccT, Order, UseCache>(acc);
-}
-
-/// @brief Utility function that returns the Coord of the round-down of @a xyz
-/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out
-template<typename CoordT, typename RealT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Floor(Vec3T<RealT>& xyz);
-
-/// @brief Template specialization of Floor for Vec3<float>
-template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Floor(Vec3T<float>& xyz)
-{
- const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])};
- xyz[0] -= ijk[0];
- xyz[1] -= ijk[1];
- xyz[2] -= ijk[2];
- return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
-}
-
-/// @brief Template specialization of Floor for Vec3<float>
-template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Floor(Vec3T<double>& xyz)
-{
- const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])};
- xyz[0] -= ijk[0];
- xyz[1] -= ijk[1];
- xyz[2] -= ijk[2];
- return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
-}
-
-// ------------------------------> NearestNeighborSampler <--------------------------------------
-
-/// @brief Nearest neighbor, i.e. zero order, interpolator with caching
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 0, true>
-{
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- static const int ORDER = 0;
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : mAcc(acc)
- , mPos(CoordT::max())
- {
- }
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @note xyz is in index space space
- template<typename Vec3T>
- inline __hostdev__ ValueT operator()(const Vec3T& xyz) const;
-
- inline __hostdev__ ValueT operator()(const CoordT& ijk) const;
-
-private:
- const TreeOrAccT& mAcc;
- mutable CoordT mPos;
- mutable ValueT mVal; // private cache
-}; // SampleFromVoxels<TreeOrAccT, 0, true>
-
-/// @brief Nearest neighbor, i.e. zero order, interpolator without caching
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 0, false>
-{
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
- static const int ORDER = 0;
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : mAcc(acc)
- {
- }
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @note xyz is in index space space
- template<typename Vec3T>
- inline __hostdev__ ValueT operator()(const Vec3T& xyz) const;
-
- inline __hostdev__ ValueT operator()(const CoordT& ijk) const { return mAcc.getValue(ijk);}
-
-private:
- const TreeOrAccT& mAcc;
-}; // SampleFromVoxels<TreeOrAccT, 0, false>
-
-template<typename TreeOrAccT>
-template<typename Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
-{
- const CoordT ijk = Round<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- mVal = mAcc.getValue(mPos);
- }
- return mVal;
-}
-
-template<typename TreeOrAccT>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
-{
- if (ijk != mPos) {
- mPos = ijk;
- mVal = mAcc.getValue(mPos);
- }
- return mVal;
-}
-
-template<typename TreeOrAccT>
-template<typename Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
-{
- return mAcc.getValue(Round<CoordT>(xyz));
-}
-
-// ------------------------------> TrilinearSampler <--------------------------------------
-
-/// @brief Tri-linear sampler, i.e. first order, interpolator
-template<typename TreeOrAccT>
-class TrilinearSampler
-{
-protected:
- const TreeOrAccT& mAcc;
-
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
- static const int ORDER = 1;
-
- /// @brief Protected constructor from a Tree or ReadAccessor
- __hostdev__ TrilinearSampler(const TreeOrAccT& acc) : mAcc(acc) {}
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @brief Extract the stencil of 8 values
- inline __hostdev__ void stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const;
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ ValueT sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2]);
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ Vec3T<ValueT> gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2]);
-
- static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[2][2][2]);
-}; // TrilinearSamplerBase
-
-template<typename TreeOrAccT>
-__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
-{
- v[0][0][0] = mAcc.getValue(ijk); // i, j, k
-
- ijk[2] += 1;
- v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1
-
- ijk[1] += 1;
- v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1
-
- ijk[2] -= 1;
- v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k
-
- ijk[0] += 1;
- ijk[1] -= 1;
- v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k
-
- ijk[2] += 1;
- v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1
-
- ijk[1] += 1;
- v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1
-
- ijk[2] -= 1;
- v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
-{
-#if 0
- auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
- //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
-#else
- auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
-#endif
- return lerp(lerp(lerp(v[0][0][0], v[0][0][1], uvw[2]), lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]),
- lerp(lerp(v[1][0][0], v[1][0][1], uvw[2]), lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]),
- uvw[0]);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
-{
- static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
-#if 0
- auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
- //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
-#else
- auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
-#endif
-
- ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]};
-
- // Z component
- Vec3T<ValueT> grad(0, 0, lerp(lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0]));
-
- const ValueT w = ValueT(uvw[2]);
- D[0] = v[0][0][0] + D[0] * w;
- D[1] = v[0][1][0] + D[1] * w;
- D[2] = v[1][0][0] + D[2] * w;
- D[3] = v[1][1][0] + D[3] * w;
-
- // X component
- grad[0] = lerp(D[2], D[3], uvw[1]) - lerp(D[0], D[1], uvw[1]);
-
- // Y component
- grad[1] = lerp(D[1] - D[0], D[3] - D[2], uvw[0]);
-
- return grad;
-}
-
-template<typename TreeOrAccT>
-__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
-{
- static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
- const bool less = v[0][0][0] < ValueT(0);
- return (less ^ (v[0][0][1] < ValueT(0))) ||
- (less ^ (v[0][1][1] < ValueT(0))) ||
- (less ^ (v[0][1][0] < ValueT(0))) ||
- (less ^ (v[1][0][0] < ValueT(0))) ||
- (less ^ (v[1][0][1] < ValueT(0))) ||
- (less ^ (v[1][1][1] < ValueT(0))) ||
- (less ^ (v[1][1][0] < ValueT(0)));
-}
-
-/// @brief Template specialization that does not use caching of stencil points
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 1, false> : public TrilinearSampler<TreeOrAccT>
-{
- using BaseT = TrilinearSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- /// @note ijk is in index space space
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
- /// @brief Return the gradient in index space.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
-}; // SampleFromVoxels<TreeOrAccT, 1, false>
-
-/// @brief Template specialization with caching of stencil values
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 1, true> : public TrilinearSampler<TreeOrAccT>
-{
- using BaseT = TrilinearSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- mutable CoordT mPos;
- mutable ValueT mVal[2][2][2];
-
- template<typename RealT, template<typename...> class Vec3T>
- __hostdev__ void cache(Vec3T<RealT>& xyz) const;
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- // @note ijk is in index space space
- __hostdev__ ValueT operator()(const CoordT &ijk) const;
-
- /// @brief Return the gradient in index space.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the cached tri-linear stencil has a zero crossing.
- ///
- /// @warning Will only compile with floating point value types
- __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
-
-}; // SampleFromVoxels<TreeOrAccT, 1, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::sample(xyz, mVal);
-}
-
-template<typename TreeOrAccT>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
-{
- return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::gradient(xyz, mVal);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, true>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::zeroCrossing(mVal);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
-{
- CoordT ijk = Floor<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- BaseT::stencil(ijk, mVal);
- }
-}
-
-#if 0
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
-{
- ValueT val[2][2][2];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::sample(xyz, val);
-}
-
-#else
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
-{
- auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
-
- CoordT coord = Floor<CoordT>(xyz);
-
- ValueT vx, vx1, vy, vy1, vz, vz1;
-
- vz = BaseT::mAcc.getValue(coord);
- coord[2] += 1;
- vz1 = BaseT::mAcc.getValue(coord);
- vy = lerp(vz, vz1, xyz[2]);
-
- coord[1] += 1;
-
- vz1 = BaseT::mAcc.getValue(coord);
- coord[2] -= 1;
- vz = BaseT::mAcc.getValue(coord);
- vy1 = lerp(vz, vz1, xyz[2]);
-
- vx = lerp(vy, vy1, xyz[1]);
-
- coord[0] += 1;
-
- vz = BaseT::mAcc.getValue(coord);
- coord[2] += 1;
- vz1 = BaseT::mAcc.getValue(coord);
- vy1 = lerp(vz, vz1, xyz[2]);
-
- coord[1] -= 1;
-
- vz1 = BaseT::mAcc.getValue(coord);
- coord[2] -= 1;
- vz = BaseT::mAcc.getValue(coord);
- vy = lerp(vz, vz1, xyz[2]);
-
- vx1 = lerp(vy, vy1, xyz[1]);
-
- return lerp(vx, vx1, xyz[0]);
-}
-#endif
-
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
-{
- ValueT val[2][2][2];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::gradient(xyz, val);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- ValueT val[2][2][2];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::zeroCrossing(val);
-}
-
-// ------------------------------> TriquadraticSampler <--------------------------------------
-
-/// @brief Tri-quadratic sampler, i.e. second order, interpolator
-template<typename TreeOrAccT>
-class TriquadraticSampler
-{
-protected:
- const TreeOrAccT& mAcc;
-
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
- static const int ORDER = 1;
-
- /// @brief Protected constructor from a Tree or ReadAccessor
- __hostdev__ TriquadraticSampler(const TreeOrAccT& acc) : mAcc(acc) {}
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @brief Extract the stencil of 27 values
- inline __hostdev__ void stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const;
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ ValueT sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3]);
-
- static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[3][3][3]);
-}; // TriquadraticSamplerBase
-
-template<typename TreeOrAccT>
-__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
-{
- CoordT p(ijk[0] - 1, 0, 0);
- for (int dx = 0; dx < 3; ++dx, ++p[0]) {
- p[1] = ijk[1] - 1;
- for (int dy = 0; dy < 3; ++dy, ++p[1]) {
- p[2] = ijk[2] - 1;
- for (int dz = 0; dz < 3; ++dz, ++p[2]) {
- v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values
- }
- }
- }
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
-{
- auto kernel = [](const ValueT* value, double weight)->ValueT {
- return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) +
- 0.5f * (value[2] - value[0])) + value[1];
- };
-
- ValueT vx[3];
- for (int dx = 0; dx < 3; ++dx) {
- ValueT vy[3];
- for (int dy = 0; dy < 3; ++dy) {
- vy[dy] = kernel(&v[dx][dy][0], uvw[2]);
- }//loop over y
- vx[dx] = kernel(vy, uvw[1]);
- }//loop over x
- return kernel(vx, uvw[0]);
-}
-
-template<typename TreeOrAccT>
-__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
-{
- static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
- const bool less = v[0][0][0] < ValueT(0);
- for (int dx = 0; dx < 3; ++dx) {
- for (int dy = 0; dy < 3; ++dy) {
- for (int dz = 0; dz < 3; ++dz) {
- if (less ^ (v[dx][dy][dz] < ValueT(0))) return true;
- }
- }
- }
- return false;
-}
-
-/// @brief Template specialization that does not use caching of stencil points
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 2, false> : public TriquadraticSampler<TreeOrAccT>
-{
- using BaseT = TriquadraticSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
-}; // SampleFromVoxels<TreeOrAccT, 2, false>
-
-/// @brief Template specialization with caching of stencil values
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 2, true> : public TriquadraticSampler<TreeOrAccT>
-{
- using BaseT = TriquadraticSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- mutable CoordT mPos;
- mutable ValueT mVal[3][3][3];
-
- template<typename RealT, template<typename...> class Vec3T>
- __hostdev__ void cache(Vec3T<RealT>& xyz) const;
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- inline __hostdev__ ValueT operator()(const CoordT &ijk) const;
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the cached tri-linear stencil has a zero crossing.
- ///
- /// @warning Will only compile with floating point value types
- __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
-
-}; // SampleFromVoxels<TreeOrAccT, 2, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::sample(xyz, mVal);
-}
-
-template<typename TreeOrAccT>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
-{
- return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, true>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::zeroCrossing(mVal);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
-{
- CoordT ijk = Floor<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- BaseT::stencil(ijk, mVal);
- }
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
-{
- ValueT val[3][3][3];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::sample(xyz, val);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- ValueT val[3][3][3];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::zeroCrossing(val);
-}
-
-// ------------------------------> TricubicSampler <--------------------------------------
-
-/// @brief Tri-cubic sampler, i.e. third order, interpolator.
-///
-/// @details See the following paper for implementation details:
-/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions.
-/// In: International Journal for Numerical Methods
-/// in Engineering (2005), No. 63, p. 455-471
-
-template<typename TreeOrAccT>
-class TricubicSampler
-{
-protected:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- const TreeOrAccT& mAcc;
-
-public:
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ TricubicSampler(const TreeOrAccT& acc)
- : mAcc(acc)
- {
- }
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @brief Extract the stencil of 8 values
- inline __hostdev__ void stencil(const CoordT& ijk, ValueT (&c)[64]) const;
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ ValueT sample(const Vec3T<RealT> &uvw, const ValueT (&c)[64]);
-}; // TricubicSampler
-
-template<typename TreeOrAccT>
-__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
-{
- auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
-
- // fetch 64 point stencil values
- for (int i = -1; i < 3; ++i) {
- for (int j = -1; j < 3; ++j) {
- fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1));
- fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0));
- fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1));
- fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2));
- }
- }
- const ValueT half(0.5), quarter(0.25), eighth(0.125);
- const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value).
- fetch(0, 0, 0),
- fetch(1, 0, 0),
- fetch(0, 1, 0),
- fetch(1, 1, 0),
- fetch(0, 0, 1),
- fetch(1, 0, 1),
- fetch(0, 1, 1),
- fetch(1, 1, 1),
- // values of df/dx at the 8 corners (each from 2 stencil values).
- half * (fetch(1, 0, 0) - fetch(-1, 0, 0)),
- half * (fetch(2, 0, 0) - fetch(0, 0, 0)),
- half * (fetch(1, 1, 0) - fetch(-1, 1, 0)),
- half * (fetch(2, 1, 0) - fetch(0, 1, 0)),
- half * (fetch(1, 0, 1) - fetch(-1, 0, 1)),
- half * (fetch(2, 0, 1) - fetch(0, 0, 1)),
- half * (fetch(1, 1, 1) - fetch(-1, 1, 1)),
- half * (fetch(2, 1, 1) - fetch(0, 1, 1)),
- // values of df/dy at the 8 corners (each from 2 stencil values).
- half * (fetch(0, 1, 0) - fetch(0, -1, 0)),
- half * (fetch(1, 1, 0) - fetch(1, -1, 0)),
- half * (fetch(0, 2, 0) - fetch(0, 0, 0)),
- half * (fetch(1, 2, 0) - fetch(1, 0, 0)),
- half * (fetch(0, 1, 1) - fetch(0, -1, 1)),
- half * (fetch(1, 1, 1) - fetch(1, -1, 1)),
- half * (fetch(0, 2, 1) - fetch(0, 0, 1)),
- half * (fetch(1, 2, 1) - fetch(1, 0, 1)),
- // values of df/dz at the 8 corners (each from 2 stencil values).
- half * (fetch(0, 0, 1) - fetch(0, 0, -1)),
- half * (fetch(1, 0, 1) - fetch(1, 0, -1)),
- half * (fetch(0, 1, 1) - fetch(0, 1, -1)),
- half * (fetch(1, 1, 1) - fetch(1, 1, -1)),
- half * (fetch(0, 0, 2) - fetch(0, 0, 0)),
- half * (fetch(1, 0, 2) - fetch(1, 0, 0)),
- half * (fetch(0, 1, 2) - fetch(0, 1, 0)),
- half * (fetch(1, 1, 2) - fetch(1, 1, 0)),
- // values of d2f/dxdy at the 8 corners (each from 4 stencil values).
- quarter * (fetch(1, 1, 0) - fetch(-1, 1, 0) - fetch(1, -1, 0) + fetch(-1, -1, 0)),
- quarter * (fetch(2, 1, 0) - fetch(0, 1, 0) - fetch(2, -1, 0) + fetch(0, -1, 0)),
- quarter * (fetch(1, 2, 0) - fetch(-1, 2, 0) - fetch(1, 0, 0) + fetch(-1, 0, 0)),
- quarter * (fetch(2, 2, 0) - fetch(0, 2, 0) - fetch(2, 0, 0) + fetch(0, 0, 0)),
- quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1)),
- quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1)),
- quarter * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1)),
- quarter * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1)),
- // values of d2f/dxdz at the 8 corners (each from 4 stencil values).
- quarter * (fetch(1, 0, 1) - fetch(-1, 0, 1) - fetch(1, 0, -1) + fetch(-1, 0, -1)),
- quarter * (fetch(2, 0, 1) - fetch(0, 0, 1) - fetch(2, 0, -1) + fetch(0, 0, -1)),
- quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1)),
- quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1)),
- quarter * (fetch(1, 0, 2) - fetch(-1, 0, 2) - fetch(1, 0, 0) + fetch(-1, 0, 0)),
- quarter * (fetch(2, 0, 2) - fetch(0, 0, 2) - fetch(2, 0, 0) + fetch(0, 0, 0)),
- quarter * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0)),
- quarter * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0)),
- // values of d2f/dydz at the 8 corners (each from 4 stencil values).
- quarter * (fetch(0, 1, 1) - fetch(0, -1, 1) - fetch(0, 1, -1) + fetch(0, -1, -1)),
- quarter * (fetch(1, 1, 1) - fetch(1, -1, 1) - fetch(1, 1, -1) + fetch(1, -1, -1)),
- quarter * (fetch(0, 2, 1) - fetch(0, 0, 1) - fetch(0, 2, -1) + fetch(0, 0, -1)),
- quarter * (fetch(1, 2, 1) - fetch(1, 0, 1) - fetch(1, 2, -1) + fetch(1, 0, -1)),
- quarter * (fetch(0, 1, 2) - fetch(0, -1, 2) - fetch(0, 1, 0) + fetch(0, -1, 0)),
- quarter * (fetch(1, 1, 2) - fetch(1, -1, 2) - fetch(1, 1, 0) + fetch(1, -1, 0)),
- quarter * (fetch(0, 2, 2) - fetch(0, 0, 2) - fetch(0, 2, 0) + fetch(0, 0, 0)),
- quarter * (fetch(1, 2, 2) - fetch(1, 0, 2) - fetch(1, 2, 0) + fetch(1, 0, 0)),
- // values of d3f/dxdydz at the 8 corners (each from 8 stencil values).
- eighth * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1) + fetch(1, -1, -1) - fetch(-1, -1, -1)),
- eighth * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1) + fetch(2, -1, -1) - fetch(0, -1, -1)),
- eighth * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1) - fetch(1, 2, -1) + fetch(-1, 2, -1) + fetch(1, 0, -1) - fetch(-1, 0, -1)),
- eighth * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1) - fetch(2, 2, -1) + fetch(0, 2, -1) + fetch(2, 0, -1) - fetch(0, 0, -1)),
- eighth * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, -1, 2) + fetch(-1, -1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0) + fetch(1, -1, 0) - fetch(-1, -1, 0)),
- eighth * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, -1, 2) + fetch(0, -1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0) + fetch(2, -1, 0) - fetch(0, -1, 0)),
- eighth * (fetch(1, 2, 2) - fetch(-1, 2, 2) - fetch(1, 0, 2) + fetch(-1, 0, 2) - fetch(1, 2, 0) + fetch(-1, 2, 0) + fetch(1, 0, 0) - fetch(-1, 0, 0)),
- eighth * (fetch(2, 2, 2) - fetch(0, 2, 2) - fetch(2, 0, 2) + fetch(0, 0, 2) - fetch(2, 2, 0) + fetch(0, 2, 0) + fetch(2, 0, 0) - fetch(0, 0, 0))};
-
- // 4Kb of static table (int8_t has a range of -127 -> 127 which suffices)
- static const int8_t A[64][64] = {
- {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
- {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0},
- {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0},
- {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1},
- {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1},
- {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0},
- {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1},
- {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1},
- {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0},
- {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0},
- {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1},
- {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1},
- {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0},
- {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1},
- {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}};
-
- for (int i = 0; i < 64; ++i) { // C = A * X
- C[i] = ValueT(0);
-#if 0
- for (int j = 0; j < 64; j += 4) {
- C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i]))));
- }
-#else
- for (int j = 0; j < 64; j += 4) {
- C[i] += A[i][j] * X[j] + A[i][j + 1] * X[j + 1] + A[i][j + 2] * X[j + 2] + A[i][j + 3] * X[j + 3];
- }
-#endif
- }
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType TricubicSampler<TreeOrAccT>::sample(const Vec3T<RealT> &xyz, const ValueT (&C)[64])
-{
- ValueT zPow(1), sum(0);
- for (int k = 0, n = 0; k < 4; ++k) {
- ValueT yPow(1);
- for (int j = 0; j < 4; ++j, n += 4) {
-#if 0
- sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum);
-#else
- sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3])));
-#endif
- yPow *= xyz[1];
- }
- zPow *= xyz[2];
- }
- return sum;
-}
-
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 3, true> : public TricubicSampler<TreeOrAccT>
-{
- using BaseT = TricubicSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- mutable CoordT mPos;
- mutable ValueT mC[64];
-
- template<typename RealT, template<typename...> class Vec3T>
- __hostdev__ void cache(Vec3T<RealT>& xyz) const;
-
-public:
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : BaseT(acc)
- {
- }
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- // @brief Return value at the coordinate @a ijk in index space space
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
-}; // SampleFromVoxels<TreeOrAccT, 3, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::sample(xyz, mC);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
-{
- CoordT ijk = Floor<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- BaseT::stencil(ijk, mC);
- }
-}
-
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 3, false> : public TricubicSampler<TreeOrAccT>
-{
- using BaseT = TricubicSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
-public:
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : BaseT(acc)
- {
- }
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
-}; // SampleFromVoxels<TreeOrAccT, 3, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, false>::operator()(Vec3T<RealT> xyz) const
-{
- ValueT C[64];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, C);
- return BaseT::sample(xyz, C);
-}
-
-} // namespace nanovdb
-
-#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+// SampleFromVoxels.h
+// Copyright Contributors to the OpenVDB Project
+// SPDX-License-Identifier: MPL-2.0
+
+//////////////////////////////////////////////////////////////////////////
+///
+/// @file SampleFromVoxels.h
+///
+/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler
+///
+/// @note These interpolators employ internal caching for better performance when used repeatedly
+/// in the same voxel location, so try to reuse an instance of these classes more than once.
+///
+/// @warning While all the interpolators defined below work with both scalars and vectors
+/// values (e.g. float and Vec3<float>) TrilinarSampler::zeroCrossing and
+/// Trilinear::gradient will only compile with floating point value types.
+///
+/// @author Ken Museth
+///
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+
+// Only define __hostdev__ when compiling as NVIDIA CUDA
+#ifdef __CUDACC__
+#define __hostdev__ __host__ __device__
+#elif defined(__KERNEL_METAL__)
+#else
+#include <cmath> // for floor
+#define __hostdev__
+#endif
+
+namespace nanovdb {
+
+// Forward declaration of sampler with specific polynomial orders
+template<typename TreeT, int Order, bool UseCache = true>
+class SampleFromVoxels;
+
+/// @brief Factory free-function for a sampler of specific polynomial orders
+///
+/// @details This allows for the compact syntax:
+/// @code
+/// auto acc = grid.getAccessor();
+/// auto smp = nanovdb::createSampler<1>( acc );
+/// @endcode
+template<int Order, typename TreeOrAccT, bool UseCache = true>
+__hostdev__ SampleFromVoxels<TreeOrAccT, Order, UseCache> createSampler(__global__ const TreeOrAccT& acc)
+{
+ return SampleFromVoxels<TreeOrAccT, Order, UseCache>(acc);
+}
+
+/// @brief Utility function that returns the Coord of the round-down of @a xyz
+/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out
+template<typename CoordT, typename RealT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__global__ Vec3T<RealT>& xyz);
+
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__global__ Vec3T<float>& xyz)
+{
+ const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__global__ Vec3T<double>& xyz)
+{
+ const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+
+#if defined(__KERNEL_METAL__)
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__local__ Vec3T<float>& xyz)
+{
+ const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__local__ Vec3T<double>& xyz)
+{
+ const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+#endif
+
+// ------------------------------> NearestNeighborSampler <--------------------------------------
+
+/// @brief Nearest neighbor, i.e. zero order, interpolator with caching
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 0, true>
+{
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ static __constant__ const int ORDER = 0;
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : mAcc(acc)
+ , mPos(CoordT::max())
+ {
+ }
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @note xyz is in index space space
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__global__ const Vec3T& xyz) const __local__;
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__local__ const Vec3T& xyz) const __local__;
+#endif
+
+ inline __hostdev__ ValueT operator()(__global__ const CoordT& ijk) const __local__;
+
+ inline __hostdev__ ValueT operator()() const;
+
+private:
+ __global__ const TreeOrAccT& mAcc;
+ mutable CoordT mPos;
+ mutable ValueT mVal; // private cache
+}; // SampleFromVoxels<TreeOrAccT, 0, true>
+
+/// @brief Nearest neighbor, i.e. zero order, interpolator without caching
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 0, false>
+{
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+ static __constant__ const int ORDER = 0;
+
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : mAcc(acc)
+ {
+ }
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const __local__ { return mAcc; }
+
+ /// @note xyz is in index space space
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__global__ const Vec3T& xyz) const __local__;
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__local__ const Vec3T& xyz) const __local__;
+#endif
+
+ inline __hostdev__ ValueT operator()(__global__ const CoordT& ijk) const __local__ { return mAcc.getValue(ijk);}
+
+private:
+ __local__ const TreeOrAccT& mAcc;
+}; // SampleFromVoxels<TreeOrAccT, 0, false>
+
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(__global__ const Vec3T& xyz) const __local__
+{
+ const CoordT ijk = Round<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ mVal = mAcc.getValue(mPos);
+ }
+ return mVal;
+}
+#if defined(__KERNEL_METAL__)
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(__local__ const Vec3T& xyz) const __local__
+{
+ const CoordT ijk = Round<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ mVal = mAcc.getValue(mPos);
+ }
+ return mVal;
+}
+#endif
+
+template<typename TreeOrAccT>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(__global__ const CoordT& ijk) const __local__
+{
+ if (ijk != mPos) {
+ mPos = ijk;
+ mVal = mAcc.getValue(mPos);
+ }
+ return mVal;
+}
+
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(__global__ const Vec3T& xyz) const __local__
+{
+ return mAcc.getValue(Round<CoordT>(xyz));
+}
+
+#if defined(__KERNEL_METAL__)
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(__local__ const Vec3T& xyz) const __local__
+{
+ return mAcc.getValue(Round<CoordT>(xyz));
+}
+#endif
+
+// ------------------------------> TrilinearSampler <--------------------------------------
+
+/// @brief Tri-linear sampler, i.e. first order, interpolator
+template<typename TreeOrAccT>
+class TrilinearSampler
+{
+#if defined(__KERNEL_METAL__)
+public:
+#else
+protected:
+#endif
+ __local__ const TreeOrAccT& mAcc;
+
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+ static __constant__ const int ORDER = 1;
+
+ /// @brief Protected constructor from a Tree or ReadAccessor
+ __hostdev__ TrilinearSampler(__local__ const TreeOrAccT& acc) : mAcc(acc) {}
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @brief Extract the stencil of 8 values
+ inline __hostdev__ void stencil(__global__ CoordT& ijk, __global__ ValueT (&v)[2][2][2]) const;
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ ValueT sample(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2]);
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ Vec3T<ValueT> gradient(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2]);
+
+ static inline __hostdev__ bool zeroCrossing(__global__ const ValueT (&v)[2][2][2]);
+}; // TrilinearSamplerBase
+
+template<typename TreeOrAccT>
+void TrilinearSampler<TreeOrAccT>::stencil(__global__ CoordT& ijk, __global__ ValueT (&v)[2][2][2]) const
+{
+ v[0][0][0] = mAcc.getValue(ijk); // i, j, k
+
+ ijk[2] += 1;
+ v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1
+
+ ijk[1] += 1;
+ v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1
+
+ ijk[2] -= 1;
+ v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k
+
+ ijk[0] += 1;
+ ijk[1] -= 1;
+ v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k
+
+ ijk[2] += 1;
+ v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1
+
+ ijk[1] += 1;
+ v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1
+
+ ijk[2] -= 1;
+ v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2])
+{
+#if 0
+ auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
+ //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
+#else
+ struct Lerp {
+ static ValueT lerp(ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }
+ };
+#endif
+ return Lerp::lerp(Lerp::lerp(Lerp::lerp(v[0][0][0], v[0][0][1], uvw[2]), Lerp::lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]),
+ Lerp::lerp(Lerp::lerp(v[1][0][0], v[1][0][1], uvw[2]), Lerp::lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]),
+ uvw[0]);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2])
+{
+ static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
+#if 0
+ auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
+ //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
+#else
+ struct Lerp {
+ static ValueT lerp(ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }
+ };
+#endif
+
+ ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]};
+
+ // Z component
+ Vec3T<ValueT> grad(0, 0, Lerp::lerp(Lerp::lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0]));
+
+ const ValueT w = ValueT(uvw[2]);
+ D[0] = v[0][0][0] + D[0] * w;
+ D[1] = v[0][1][0] + D[1] * w;
+ D[2] = v[1][0][0] + D[2] * w;
+ D[3] = v[1][1][0] + D[3] * w;
+
+ // X component
+ grad[0] = Lerp::lerp(D[2], D[3], uvw[1]) - Lerp::lerp(D[0], D[1], uvw[1]);
+
+ // Y component
+ grad[1] = Lerp::lerp(D[1] - D[0], D[3] - D[2], uvw[0]);
+
+ return grad;
+}
+
+template<typename TreeOrAccT>
+bool TrilinearSampler<TreeOrAccT>::zeroCrossing(__global__ const ValueT (&v)[2][2][2])
+{
+ static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
+ const bool less = v[0][0][0] < ValueT(0);
+ return (less ^ (v[0][0][1] < ValueT(0))) ||
+ (less ^ (v[0][1][1] < ValueT(0))) ||
+ (less ^ (v[0][1][0] < ValueT(0))) ||
+ (less ^ (v[1][0][0] < ValueT(0))) ||
+ (less ^ (v[1][0][1] < ValueT(0))) ||
+ (less ^ (v[1][1][1] < ValueT(0))) ||
+ (less ^ (v[1][1][0] < ValueT(0)));
+}
+
+/// @brief Template specialization that does not use caching of stencil points
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 1, false>
+#if !defined(__KERNEL_METAL__)
+ : public TrilinearSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+
+ TrilinearSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+
+#endif
+ using BaseT = TrilinearSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+#if defined(__KERNEL_METAL__)
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : _base(acc) {}
+#else
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc) {}
+#endif
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ /// @note ijk is in index space space
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+ /// @brief Return the gradient in index space.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+}; // SampleFromVoxels<TreeOrAccT, 1, false>
+
+/// @brief Template specialization with caching of stencil values
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 1, true>
+#if !defined(__KERNEL_METAL__)
+ : public TrilinearSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TrilinearSampler<TreeOrAccT> _base;
+#endif
+ using BaseT = TrilinearSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ mutable CoordT mPos;
+ mutable ValueT mVal[2][2][2];
+
+ template<typename RealT, template<typename...> class Vec3T>
+ __hostdev__ void cache(__global__ Vec3T<RealT>& xyz) const;
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ // @note ijk is in index space space
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const;
+
+ /// @brief Return the gradient in index space.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the cached tri-linear stencil has a zero crossing.
+ ///
+ /// @warning Will only compile with floating point value types
+ __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
+
+}; // SampleFromVoxels<TreeOrAccT, 1, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::sample(xyz, mVal);
+}
+
+template<typename TreeOrAccT>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(__global__ const CoordT &ijk) const
+{
+ return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::gradient(xyz, mVal);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, true>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::zeroCrossing(mVal);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+void SampleFromVoxels<TreeOrAccT, 1, true>::cache(__global__ Vec3T<RealT>& xyz) const
+{
+ CoordT ijk = Floor<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ BaseT::stencil(ijk, mVal);
+ }
+}
+
+#if 0
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+{
+ ValueT val[2][2][2];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::sample(xyz, val);
+}
+
+#else
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+{
+ struct Lerp {
+ static ValueT lerp(ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }
+ };
+
+ CoordT coord = Floor<CoordT>(xyz);
+
+ ValueT vx, vx1, vy, vy1, vz, vz1;
+
+ vz = BASE(mAcc).getValue(coord);
+ coord[2] += 1;
+ vz1 = BASE(mAcc).getValue(coord);
+ vy = Lerp::lerp(vz, vz1, xyz[2]);
+
+ coord[1] += 1;
+
+ vz1 = BASE(mAcc).getValue(coord);
+ coord[2] -= 1;
+ vz = BASE(mAcc).getValue(coord);
+ vy1 = Lerp::lerp(vz, vz1, xyz[2]);
+
+ vx = Lerp::lerp(vy, vy1, xyz[1]);
+
+ coord[0] += 1;
+
+ vz = BASE(mAcc).getValue(coord);
+ coord[2] += 1;
+ vz1 = BASE(mAcc).getValue(coord);
+ vy1 = Lerp::lerp(vz, vz1, xyz[2]);
+
+ coord[1] -= 1;
+
+ vz1 = BASE(mAcc).getValue(coord);
+ coord[2] -= 1;
+ vz = BASE(mAcc).getValue(coord);
+ vy = Lerp::lerp(vz, vz1, xyz[2]);
+
+ vx1 = Lerp::lerp(vy, vy1, xyz[1]);
+
+ return Lerp::lerp(vx, vx1, xyz[0]);
+}
+#endif
+
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
+{
+ ValueT val[2][2][2];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::gradient(xyz, val);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ ValueT val[2][2][2];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::zeroCrossing(val);
+}
+
+// ------------------------------> TriquadraticSampler <--------------------------------------
+
+/// @brief Tri-quadratic sampler, i.e. second order, interpolator
+template<typename TreeOrAccT>
+class TriquadraticSampler
+{
+protected:
+ __local__ const TreeOrAccT& mAcc;
+
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+ static __constant__ const int ORDER = 1;
+
+ /// @brief Protected constructor from a Tree or ReadAccessor
+ __hostdev__ TriquadraticSampler(__local__ const TreeOrAccT& acc) : mAcc(acc) {}
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @brief Extract the stencil of 27 values
+ inline __hostdev__ void stencil(__local__ const CoordT &ijk, __local__ ValueT (&v)[3][3][3]) const;
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ ValueT sample(__local__ const Vec3T<RealT> &uvw, __local__ const ValueT (&v)[3][3][3]);
+
+ static inline __hostdev__ bool zeroCrossing(__global__ const ValueT (&v)[3][3][3]);
+}; // TriquadraticSamplerBase
+
+template<typename TreeOrAccT>
+void TriquadraticSampler<TreeOrAccT>::stencil(__local__ const CoordT &ijk, __local__ ValueT (&v)[3][3][3]) const
+{
+ CoordT p(ijk[0] - 1, 0, 0);
+ for (int dx = 0; dx < 3; ++dx, ++p[0]) {
+ p[1] = ijk[1] - 1;
+ for (int dy = 0; dy < 3; ++dy, ++p[1]) {
+ p[2] = ijk[2] - 1;
+ for (int dz = 0; dz < 3; ++dz, ++p[2]) {
+ v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values
+ }
+ }
+ }
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(__local__ const Vec3T<RealT> &uvw, __local__ const ValueT (&v)[3][3][3])
+{
+ struct Kernel {
+ static ValueT _kernel(__local__ const ValueT* value, double weight) {
+ return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + 0.5f * (value[2] - value[0])) + value[1];
+ }
+ };
+
+ ValueT vx[3];
+ for (int dx = 0; dx < 3; ++dx) {
+ ValueT vy[3];
+ for (int dy = 0; dy < 3; ++dy) {
+ vy[dy] = Kernel::_kernel(&v[dx][dy][0], uvw[2]);
+ }//loop over y
+ vx[dx] = Kernel::_kernel(vy, uvw[1]);
+ }//loop over x
+ return Kernel::_kernel(vx, uvw[0]);
+}
+
+template<typename TreeOrAccT>
+bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(__global__ const ValueT (&v)[3][3][3])
+{
+ static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
+ const bool less = v[0][0][0] < ValueT(0);
+ for (int dx = 0; dx < 3; ++dx) {
+ for (int dy = 0; dy < 3; ++dy) {
+ for (int dz = 0; dz < 3; ++dz) {
+ if (less ^ (v[dx][dy][dz] < ValueT(0))) return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// @brief Template specialization that does not use caching of stencil points
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 2, false>
+#if !defined(__KERNEL_METAL__)
+ : public TriquadraticSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TriquadraticSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TriquadraticSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+#if defined(__KERNEL_METAL__)
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : _base(acc) {}
+#else
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc) {}
+#endif
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+}; // SampleFromVoxels<TreeOrAccT, 2, false>
+
+/// @brief Template specialization with caching of stencil values
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 2, true>
+#if !defined(__KERNEL_METAL__)
+ : public TriquadraticSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TriquadraticSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TriquadraticSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ mutable CoordT mPos;
+ mutable ValueT mVal[3][3][3];
+
+ template<typename RealT, template<typename...> class Vec3T>
+ __hostdev__ void cache(__global__ Vec3T<RealT>& xyz) const;
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ inline __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const;
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the cached tri-linear stencil has a zero crossing.
+ ///
+ /// @warning Will only compile with floating point value types
+ __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
+
+}; // SampleFromVoxels<TreeOrAccT, 2, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::sample(xyz, mVal);
+}
+
+template<typename TreeOrAccT>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(__global__ const CoordT &ijk) const
+{
+ return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, true>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::zeroCrossing(mVal);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+void SampleFromVoxels<TreeOrAccT, 2, true>::cache(__global__ Vec3T<RealT>& xyz) const
+{
+ CoordT ijk = Floor<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ BaseT::stencil(ijk, mVal);
+ }
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
+{
+ ValueT val[3][3][3];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BASE(stencil)(ijk, val);
+ return BaseT::sample(xyz, val);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ ValueT val[3][3][3];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::zeroCrossing(val);
+}
+
+// ------------------------------> TricubicSampler <--------------------------------------
+
+/// @brief Tri-cubic sampler, i.e. third order, interpolator.
+///
+/// @details See the following paper for implementation details:
+/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions.
+/// In: International Journal for Numerical Methods
+/// in Engineering (2005), No. 63, p. 455-471
+
+template<typename TreeOrAccT>
+class TricubicSampler
+{
+protected:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ __global__ const TreeOrAccT& mAcc;
+
+public:
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ TricubicSampler(__global__ const TreeOrAccT& acc)
+ : mAcc(acc)
+ {
+ }
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @brief Extract the stencil of 8 values
+ inline __hostdev__ void stencil(__global__ const CoordT& ijk, __global__ ValueT (&c)[64]) const;
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ ValueT sample(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&c)[64]);
+}; // TricubicSampler
+
+// 4Kb of static table (int8_t has a range of -127 -> 127 which suffices)
+static __constant__ const int8_t TricubicSampler_A[64][64] = {
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
+ {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0},
+ {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0},
+ {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1},
+ {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1},
+ {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0},
+ {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1},
+ {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1},
+ {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0},
+ {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0},
+ {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1},
+ {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1},
+ {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1},
+ {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+template<typename TreeOrAccT>
+void TricubicSampler<TreeOrAccT>::stencil(__global__ const CoordT& ijk, __global__ ValueT (&C)[64]) const
+{
+ struct Fetch {
+ Fetch(__global__ ValueT (&_C)[64]):C(_C) {}
+ __global__ ValueT& fetch(int i, int j, int k) { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; }
+
+ __global__ ValueT (&C)[64];
+ };
+ Fetch f(C);
+
+ // fetch 64 point stencil values
+ for (int i = -1; i < 3; ++i) {
+ for (int j = -1; j < 3; ++j) {
+ Fetch::fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1));
+ Fetch::fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0));
+ Fetch::fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1));
+ Fetch::fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2));
+ }
+ }
+ const ValueT _half(0.5), quarter(0.25), eighth(0.125);
+ const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value).
+ f.fetch(0, 0, 0),
+ f.fetch(1, 0, 0),
+ f.fetch(0, 1, 0),
+ f.fetch(1, 1, 0),
+ f.fetch(0, 0, 1),
+ f.fetch(1, 0, 1),
+ f.fetch(0, 1, 1),
+ f.fetch(1, 1, 1),
+ // values of df/dx at the 8 corners (each from 2 stencil values).
+ _half * (f.fetch(1, 0, 0) - f.fetch(-1, 0, 0)),
+ _half * (f.fetch(2, 0, 0) - f.fetch(0, 0, 0)),
+ _half * (f.fetch(1, 1, 0) - f.fetch(-1, 1, 0)),
+ _half * (f.fetch(2, 1, 0) - f.fetch(0, 1, 0)),
+ _half * (f.fetch(1, 0, 1) - f.fetch(-1, 0, 1)),
+ _half * (f.fetch(2, 0, 1) - f.fetch(0, 0, 1)),
+ _half * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1)),
+ _half * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1)),
+ // values of df/dy at the 8 corners (each from 2 stencil values).
+ _half * (f.fetch(0, 1, 0) - f.fetch(0, -1, 0)),
+ _half * (f.fetch(1, 1, 0) - f.fetch(1, -1, 0)),
+ _half * (f.fetch(0, 2, 0) - f.fetch(0, 0, 0)),
+ _half * (f.fetch(1, 2, 0) - f.fetch(1, 0, 0)),
+ _half * (f.fetch(0, 1, 1) - f.fetch(0, -1, 1)),
+ _half * (f.fetch(1, 1, 1) - f.fetch(1, -1, 1)),
+ _half * (f.fetch(0, 2, 1) - f.fetch(0, 0, 1)),
+ _half * (f.fetch(1, 2, 1) - f.fetch(1, 0, 1)),
+ // values of df/dz at the 8 corners (each from 2 stencil values).
+ _half * (f.fetch(0, 0, 1) - f.fetch(0, 0, -1)),
+ _half * (f.fetch(1, 0, 1) - f.fetch(1, 0, -1)),
+ _half * (f.fetch(0, 1, 1) - f.fetch(0, 1, -1)),
+ _half * (f.fetch(1, 1, 1) - f.fetch(1, 1, -1)),
+ _half * (f.fetch(0, 0, 2) - f.fetch(0, 0, 0)),
+ _half * (f.fetch(1, 0, 2) - f.fetch(1, 0, 0)),
+ _half * (f.fetch(0, 1, 2) - f.fetch(0, 1, 0)),
+ _half * (f.fetch(1, 1, 2) - f.fetch(1, 1, 0)),
+ // values of d2f/dxdy at the 8 corners (each from 4 stencil values).
+ quarter * (f.fetch(1, 1, 0) - f.fetch(-1, 1, 0) - f.fetch(1, -1, 0) + f.fetch(-1, -1, 0)),
+ quarter * (f.fetch(2, 1, 0) - f.fetch(0, 1, 0) - f.fetch(2, -1, 0) + f.fetch(0, -1, 0)),
+ quarter * (f.fetch(1, 2, 0) - f.fetch(-1, 2, 0) - f.fetch(1, 0, 0) + f.fetch(-1, 0, 0)),
+ quarter * (f.fetch(2, 2, 0) - f.fetch(0, 2, 0) - f.fetch(2, 0, 0) + f.fetch(0, 0, 0)),
+ quarter * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1) - f.fetch(1, -1, 1) + f.fetch(-1, -1, 1)),
+ quarter * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1) - f.fetch(2, -1, 1) + f.fetch(0, -1, 1)),
+ quarter * (f.fetch(1, 2, 1) - f.fetch(-1, 2, 1) - f.fetch(1, 0, 1) + f.fetch(-1, 0, 1)),
+ quarter * (f.fetch(2, 2, 1) - f.fetch(0, 2, 1) - f.fetch(2, 0, 1) + f.fetch(0, 0, 1)),
+ // values of d2f/dxdz at the 8 corners (each from 4 stencil values).
+ quarter * (f.fetch(1, 0, 1) - f.fetch(-1, 0, 1) - f.fetch(1, 0, -1) + f.fetch(-1, 0, -1)),
+ quarter * (f.fetch(2, 0, 1) - f.fetch(0, 0, 1) - f.fetch(2, 0, -1) + f.fetch(0, 0, -1)),
+ quarter * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1) - f.fetch(1, 1, -1) + f.fetch(-1, 1, -1)),
+ quarter * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1) - f.fetch(2, 1, -1) + f.fetch(0, 1, -1)),
+ quarter * (f.fetch(1, 0, 2) - f.fetch(-1, 0, 2) - f.fetch(1, 0, 0) + f.fetch(-1, 0, 0)),
+ quarter * (f.fetch(2, 0, 2) - f.fetch(0, 0, 2) - f.fetch(2, 0, 0) + f.fetch(0, 0, 0)),
+ quarter * (f.fetch(1, 1, 2) - f.fetch(-1, 1, 2) - f.fetch(1, 1, 0) + f.fetch(-1, 1, 0)),
+ quarter * (f.fetch(2, 1, 2) - f.fetch(0, 1, 2) - f.fetch(2, 1, 0) + f.fetch(0, 1, 0)),
+ // values of d2f/dydz at the 8 corners (each from 4 stencil values).
+ quarter * (f.fetch(0, 1, 1) - f.fetch(0, -1, 1) - f.fetch(0, 1, -1) + f.fetch(0, -1, -1)),
+ quarter * (f.fetch(1, 1, 1) - f.fetch(1, -1, 1) - f.fetch(1, 1, -1) + f.fetch(1, -1, -1)),
+ quarter * (f.fetch(0, 2, 1) - f.fetch(0, 0, 1) - f.fetch(0, 2, -1) + f.fetch(0, 0, -1)),
+ quarter * (f.fetch(1, 2, 1) - f.fetch(1, 0, 1) - f.fetch(1, 2, -1) + f.fetch(1, 0, -1)),
+ quarter * (f.fetch(0, 1, 2) - f.fetch(0, -1, 2) - f.fetch(0, 1, 0) + f.fetch(0, -1, 0)),
+ quarter * (f.fetch(1, 1, 2) - f.fetch(1, -1, 2) - f.fetch(1, 1, 0) + f.fetch(1, -1, 0)),
+ quarter * (f.fetch(0, 2, 2) - f.fetch(0, 0, 2) - f.fetch(0, 2, 0) + f.fetch(0, 0, 0)),
+ quarter * (f.fetch(1, 2, 2) - f.fetch(1, 0, 2) - f.fetch(1, 2, 0) + f.fetch(1, 0, 0)),
+ // values of d3f/dxdydz at the 8 corners (each from 8 stencil values).
+ eighth * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1) - f.fetch(1, -1, 1) + f.fetch(-1, -1, 1) - f.fetch(1, 1, -1) + f.fetch(-1, 1, -1) + f.fetch(1, -1, -1) - f.fetch(-1, -1, -1)),
+ eighth * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1) - f.fetch(2, -1, 1) + f.fetch(0, -1, 1) - f.fetch(2, 1, -1) + f.fetch(0, 1, -1) + f.fetch(2, -1, -1) - f.fetch(0, -1, -1)),
+ eighth * (f.fetch(1, 2, 1) - f.fetch(-1, 2, 1) - f.fetch(1, 0, 1) + f.fetch(-1, 0, 1) - f.fetch(1, 2, -1) + f.fetch(-1, 2, -1) + f.fetch(1, 0, -1) - f.fetch(-1, 0, -1)),
+ eighth * (f.fetch(2, 2, 1) - f.fetch(0, 2, 1) - f.fetch(2, 0, 1) + f.fetch(0, 0, 1) - f.fetch(2, 2, -1) + f.fetch(0, 2, -1) + f.fetch(2, 0, -1) - f.fetch(0, 0, -1)),
+ eighth * (f.fetch(1, 1, 2) - f.fetch(-1, 1, 2) - f.fetch(1, -1, 2) + f.fetch(-1, -1, 2) - f.fetch(1, 1, 0) + f.fetch(-1, 1, 0) + f.fetch(1, -1, 0) - f.fetch(-1, -1, 0)),
+ eighth * (f.fetch(2, 1, 2) - f.fetch(0, 1, 2) - f.fetch(2, -1, 2) + f.fetch(0, -1, 2) - f.fetch(2, 1, 0) + f.fetch(0, 1, 0) + f.fetch(2, -1, 0) - f.fetch(0, -1, 0)),
+ eighth * (f.fetch(1, 2, 2) - f.fetch(-1, 2, 2) - f.fetch(1, 0, 2) + f.fetch(-1, 0, 2) - f.fetch(1, 2, 0) + f.fetch(-1, 2, 0) + f.fetch(1, 0, 0) - f.fetch(-1, 0, 0)),
+ eighth * (f.fetch(2, 2, 2) - f.fetch(0, 2, 2) - f.fetch(2, 0, 2) + f.fetch(0, 0, 2) - f.fetch(2, 2, 0) + f.fetch(0, 2, 0) + f.fetch(2, 0, 0) - f.fetch(0, 0, 0))};
+
+ for (int i = 0; i < 64; ++i) { // C = A * X
+ C[i] = ValueT(0);
+#if 0
+ for (int j = 0; j < 64; j += 4) {
+ C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i]))));
+ }
+#else
+ for (int j = 0; j < 64; j += 4) {
+ C[i] += TricubicSampler_A[i][j] * X[j] + TricubicSampler_A[i][j + 1] * X[j + 1] +
+ TricubicSampler_A[i][j + 2] * X[j + 2] + TricubicSampler_A[i][j + 3] * X[j + 3];
+ }
+#endif
+ }
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ typename TreeOrAccT::ValueType TricubicSampler<TreeOrAccT>::sample(__global__ const Vec3T<RealT> &xyz, __global__ const ValueT (&C)[64])
+{
+ ValueT zPow(1), sum(0);
+ for (int k = 0, n = 0; k < 4; ++k) {
+ ValueT yPow(1);
+ for (int j = 0; j < 4; ++j, n += 4) {
+#if 0
+ sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum);
+#else
+ sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3])));
+#endif
+ yPow *= xyz[1];
+ }
+ zPow *= xyz[2];
+ }
+ return sum;
+}
+
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 3, true>
+#if !defined(__KERNEL_METAL__)
+ : public TricubicSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TricubicSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TricubicSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ mutable CoordT mPos;
+ mutable ValueT mC[64];
+
+ template<typename RealT, template<typename...> class Vec3T>
+ __hostdev__ void cache(__global__ Vec3T<RealT>& xyz) const;
+
+public:
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : BaseT(acc)
+ {
+ }
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ // @brief Return value at the coordinate @a ijk in index space space
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+}; // SampleFromVoxels<TreeOrAccT, 3, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::sample(xyz, mC);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+void SampleFromVoxels<TreeOrAccT, 3, true>::cache(__global__ Vec3T<RealT>& xyz) const
+{
+ CoordT ijk = Floor<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ BaseT::stencil(ijk, mC);
+ }
+}
+
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 3, false>
+#if !defined(__KERNEL_METAL__)
+ : public TricubicSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TricubicSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TricubicSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+public:
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : BaseT(acc)
+ {
+ }
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+}; // SampleFromVoxels<TreeOrAccT, 3, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, false>::operator()(Vec3T<RealT> xyz) const
+{
+ ValueT C[64];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, C);
+ return BaseT::sample(xyz, C);
+}
+
+} // namespace nanovdb
+
+#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED