ClangFormat: apply to source, most of intern

Apply clang format as proposed in T53211.

For details on usage and instructions for migrating branches
without conflicts, see:

https://wiki.blender.org/wiki/Tools/ClangFormat
This commit is contained in:
2019-04-17 06:17:24 +02:00
parent b3dabc200a
commit e12c08e8d1
4481 changed files with 1230080 additions and 1155401 deletions

View File

@@ -113,7 +113,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE size_t
atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
@@ -123,7 +124,6 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new);
ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new);
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,

View File

@@ -56,105 +56,106 @@ ATOMIC_STATIC_ASSERT(sizeof(size_t) == LG_SIZEOF_PTR, "sizeof(size_t) != LG_SIZE
ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
#endif
}
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
{
size_t prev_value;
while((prev_value = *p) < x) {
if(atomic_cas_z(p, prev_value, x) == prev_value) {
break;
}
}
return prev_value;
size_t prev_value;
while ((prev_value = *p) < x) {
if (atomic_cas_z(p, prev_value, x) == prev_value) {
break;
}
}
return prev_value;
}
/******************************************************************************/
/* unsigned operations. */
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT, "sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT,
"sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
#endif
}
@@ -162,12 +163,12 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
/* Char operations. */
ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b)
{
return (char)atomic_fetch_and_or_uint8((uint8_t *)p, (uint8_t)b);
return (char)atomic_fetch_and_or_uint8((uint8_t *)p, (uint8_t)b);
}
ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b)
{
return (char)atomic_fetch_and_and_uint8((uint8_t *)p, (uint8_t)b);
return (char)atomic_fetch_and_and_uint8((uint8_t *)p, (uint8_t)b);
}
/******************************************************************************/
@@ -176,9 +177,9 @@ ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b)
ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new)
{
#if (LG_SIZEOF_PTR == 8)
return (void *)atomic_cas_uint64((uint64_t *)v, *(uint64_t *)&old, *(uint64_t *)&_new);
return (void *)atomic_cas_uint64((uint64_t *)v, *(uint64_t *)&old, *(uint64_t *)&_new);
#elif (LG_SIZEOF_PTR == 4)
return (void *)atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new);
return (void *)atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new);
#endif
}
@@ -188,22 +189,22 @@ ATOMIC_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t), "sizeof(float) != sizeof
ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new)
{
uint32_t ret = atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new);
return *(float *)&ret;
uint32_t ret = atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new);
return *(float *)&ret;
}
ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x)
{
float oldval, newval;
uint32_t prevval;
float oldval, newval;
uint32_t prevval;
do { /* Note that since collisions are unlikely, loop will nearly always run once. */
oldval = *p;
newval = oldval + x;
prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
} while (_ATOMIC_UNLIKELY(prevval != *(uint32_t *)(&oldval)));
do { /* Note that since collisions are unlikely, loop will nearly always run once. */
oldval = *p;
newval = oldval + x;
prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
} while (_ATOMIC_UNLIKELY(prevval != *(uint32_t *)(&oldval)));
return newval;
return newval;
}
#endif /* __ATOMIC_OPS_EXT_H__ */

View File

@@ -40,7 +40,7 @@
#include <windows.h>
#include <intrin.h>
#if defined (__clang__)
#if defined(__clang__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#endif
@@ -50,53 +50,53 @@
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
}
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
}
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{
return InterlockedCompareExchange64((int64_t *)v, _new, old);
return InterlockedCompareExchange64((int64_t *)v, _new, old);
}
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
}
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x));
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x));
}
/* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, x) + x;
return InterlockedExchangeAdd64(p, x) + x;
}
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, -x) - x;
return InterlockedExchangeAdd64(p, -x) - x;
}
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
return InterlockedCompareExchange64(v, _new, old);
return InterlockedCompareExchange64(v, _new, old);
}
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, x);
return InterlockedExchangeAdd64(p, x);
}
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, -x);
return InterlockedExchangeAdd64(p, -x);
}
#endif
@@ -105,63 +105,63 @@ ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, x) + x;
return InterlockedExchangeAdd(p, x) + x;
}
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
}
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{
return InterlockedCompareExchange((long *)v, _new, old);
return InterlockedCompareExchange((long *)v, _new, old);
}
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, x);
return InterlockedExchangeAdd(p, x);
}
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
{
return InterlockedOr((long *)p, x);
return InterlockedOr((long *)p, x);
}
ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
{
return InterlockedAnd((long *)p, x);
return InterlockedAnd((long *)p, x);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd((long *)p, x) + x;
return InterlockedExchangeAdd((long *)p, x) + x;
}
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd((long *)p, -x) - x;
return InterlockedExchangeAdd((long *)p, -x) - x;
}
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
return InterlockedCompareExchange((long *)v, _new, old);
return InterlockedCompareExchange((long *)v, _new, old);
}
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd((long *)p, x);
return InterlockedExchangeAdd((long *)p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
{
return InterlockedOr((long *)p, x);
return InterlockedOr((long *)p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
{
return InterlockedAnd((long *)p, x);
return InterlockedAnd((long *)p, x);
}
/******************************************************************************/
@@ -172,9 +172,9 @@ ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
{
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedAnd8((char *)p, (char)b);
return InterlockedAnd8((char *)p, (char)b);
#else
return _InterlockedAnd8((char *)p, (char)b);
return _InterlockedAnd8((char *)p, (char)b);
#endif
}
@@ -182,9 +182,9 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
{
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedOr8((char *)p, (char)b);
return InterlockedOr8((char *)p, (char)b);
#else
return _InterlockedOr8((char *)p, (char)b);
return _InterlockedOr8((char *)p, (char)b);
#endif
}
@@ -193,9 +193,9 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
{
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedAnd8((char *)p, (char)b);
return InterlockedAnd8((char *)p, (char)b);
#else
return _InterlockedAnd8((char *)p, (char)b);
return _InterlockedAnd8((char *)p, (char)b);
#endif
}
@@ -203,14 +203,13 @@ ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
{
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedOr8((char *)p, (char)b);
return InterlockedOr8((char *)p, (char)b);
#else
return _InterlockedOr8((char *)p, (char)b);
return _InterlockedOr8((char *)p, (char)b);
#endif
}
#if defined (__clang__)
#if defined(__clang__)
# pragma GCC diagnostic pop
#endif

View File

@@ -56,140 +56,128 @@
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return __sync_add_and_fetch(p, x);
return __sync_add_and_fetch(p, x);
}
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return __sync_sub_and_fetch(p, x);
return __sync_sub_and_fetch(p, x);
}
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
return __sync_fetch_and_add(p, x);
return __sync_fetch_and_add(p, x);
}
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{
return __sync_fetch_and_sub(p, x);
return __sync_fetch_and_sub(p, x);
}
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
return __sync_val_compare_and_swap(v, old, _new);
}
/* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
return __sync_add_and_fetch(p, x);
return __sync_add_and_fetch(p, x);
}
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{
return __sync_sub_and_fetch(p, x);
return __sync_sub_and_fetch(p, x);
}
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
return __sync_fetch_and_add(p, x);
return __sync_fetch_and_add(p, x);
}
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
return __sync_fetch_and_sub(p, x);
return __sync_fetch_and_sub(p, x);
}
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
return __sync_val_compare_and_swap(v, old, _new);
}
# elif (defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return x;
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{
x = (uint64_t)(-(int64_t)x);
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return x;
x = (uint64_t)(-(int64_t)x);
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return atomic_fetch_and_add_uint64(p, x) + x;
return atomic_fetch_and_add_uint64(p, x) + x;
}
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return atomic_fetch_and_sub_uint64(p, x) - x;
return atomic_fetch_and_sub_uint64(p, x) - x;
}
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{
uint64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
uint64_t ret;
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
/* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return x;
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
x = -x;
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return x;
x = -x;
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
return atomic_fetch_and_add_int64(p, x) + x;
return atomic_fetch_and_add_int64(p, x) + x;
}
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{
return atomic_fetch_and_sub_int64(p, x) - x;
return atomic_fetch_and_sub_int64(p, x) - x;
}
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
int64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
int64_t ret;
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
# else
# error "Missing implementation for 64-bit atomic operations"
@@ -202,102 +190,90 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return __sync_add_and_fetch(p, x);
return __sync_add_and_fetch(p, x);
}
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return __sync_sub_and_fetch(p, x);
return __sync_sub_and_fetch(p, x);
}
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
return __sync_val_compare_and_swap(v, old, _new);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
return __sync_add_and_fetch(p, x);
return __sync_add_and_fetch(p, x);
}
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
return __sync_sub_and_fetch(p, x);
return __sync_sub_and_fetch(p, x);
}
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
return __sync_val_compare_and_swap(v, old, _new);
}
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret + x;
uint32_t ret = x;
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret + x;
}
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = (uint32_t)(-(int32_t)x);
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret - x;
uint32_t ret = (uint32_t)(-(int32_t)x);
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret - x;
}
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{
uint32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
uint32_t ret;
asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret + x;
int32_t ret = x;
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret + x;
}
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = -x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret - x;
int32_t ret = -x;
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret - x;
}
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
int32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
int32_t ret;
asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
#else
@@ -308,33 +284,33 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{
return __sync_fetch_and_add(p, x);
return __sync_fetch_and_add(p, x);
}
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
{
return __sync_fetch_and_or(p, x);
return __sync_fetch_and_or(p, x);
}
ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
{
return __sync_fetch_and_and(p, x);
return __sync_fetch_and_and(p, x);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{
return __sync_fetch_and_add(p, x);
return __sync_fetch_and_add(p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
{
return __sync_fetch_and_or(p, x);
return __sync_fetch_and_or(p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
{
return __sync_fetch_and_and(p, x);
return __sync_fetch_and_and(p, x);
}
#else
@@ -347,21 +323,21 @@ ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
/* Unsigned */
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
{
return __sync_fetch_and_and(p, b);
return __sync_fetch_and_and(p, b);
}
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
{
return __sync_fetch_and_or(p, b);
return __sync_fetch_and_or(p, b);
}
/* Signed */
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
{
return __sync_fetch_and_and(p, b);
return __sync_fetch_and_and(p, b);
}
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
{
return __sync_fetch_and_or(p, b);
return __sync_fetch_and_or(p, b);
}
#else

View File

@@ -62,11 +62,11 @@
#endif
#ifdef __GNUC__
# define _ATOMIC_LIKELY(x) __builtin_expect(!!(x), 1)
# define _ATOMIC_UNLIKELY(x) __builtin_expect(!!(x), 0)
# define _ATOMIC_LIKELY(x) __builtin_expect(!!(x), 1)
# define _ATOMIC_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define _ATOMIC_LIKELY(x) (x)
# define _ATOMIC_UNLIKELY(x) (x)
# define _ATOMIC_LIKELY(x) (x)
# define _ATOMIC_UNLIKELY(x) (x)
#endif
#if defined(__SIZEOF_POINTER__)
@@ -77,7 +77,7 @@
# elif (UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF)
# define LG_SIZEOF_PTR 8
# endif
#elif defined(__WORDSIZE) /* Fallback for older glibc and cpp */
#elif defined(__WORDSIZE) /* Fallback for older glibc and cpp */
# if (__WORDSIZE == 32)
# define LG_SIZEOF_PTR 4
# elif (__WORDSIZE == 64)
@@ -100,9 +100,8 @@
/* Copied from BLI_utils... */
/* C++ can't use _Static_assert, expects static_assert() but c++0x only,
* Coverity also errors out. */
#if (!defined(__cplusplus)) && \
(!defined(__COVERITY__)) && \
(defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 406)) /* gcc4.6+ only */
#if (!defined(__cplusplus)) && (!defined(__COVERITY__)) && \
(defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 406)) /* gcc4.6+ only */
# define ATOMIC_STATIC_ASSERT(a, msg) __extension__ _Static_assert(a, msg);
#else
/* Code adapted from http://www.pixelbeat.org/programming/gcc/static_assert.html */
@@ -110,17 +109,19 @@
* expand __LINE__ with one indirection before doing the actual concatenation. */
# define ATOMIC_ASSERT_CONCAT_(a, b) a##b
# define ATOMIC_ASSERT_CONCAT(a, b) ATOMIC_ASSERT_CONCAT_(a, b)
/* These can't be used after statements in c89. */
# if defined(__COUNTER__) /* MSVC */
/* These can't be used after statements in c89. */
# if defined(__COUNTER__) /* MSVC */
# define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
# else /* older gcc, clang... */
/* This can't be used twice on the same line so ensure if using in headers
; \
enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
# else /* older gcc, clang... */
/* This can't be used twice on the same line so ensure if using in headers
* that the headers are not included twice (by wrapping in #ifndef...#endif)
* Note it doesn't cause an issue when used on same line of separate modules
* compiled with gcc -combine -fwhole-program. */
# define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
; \
enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
# endif
#endif

View File

@@ -22,46 +22,46 @@
remove_strict_flags()
if(CMAKE_COMPILER_IS_GNUCC)
remove_cc_flag("-Wunused-macros")
remove_cc_flag("-Wunused-macros")
endif()
set(INC
.
)
set(INC
.
)
set(INC_SYS
${AUDASPACE_C_INCLUDE_DIRS}
${AUDASPACE_PY_INCLUDE_DIRS}
)
set(INC_SYS
${AUDASPACE_C_INCLUDE_DIRS}
${AUDASPACE_PY_INCLUDE_DIRS}
)
set(SRC
intern/AUD_Set.cpp
intern/AUD_Set.h
)
set(SRC
intern/AUD_Set.cpp
intern/AUD_Set.h
)
set(LIB
)
if(NOT WITH_SYSTEM_AUDASPACE)
list(APPEND LIB
audaspace
)
list(APPEND LIB
audaspace
)
endif()
if(WITH_PYTHON)
list(APPEND INC_SYS
${PYTHON_INCLUDE_DIRS}
)
list(APPEND SRC
intern/AUD_PyInit.cpp
intern/AUD_PyInit.h
)
if(NOT WITH_SYSTEM_AUDASPACE)
list(APPEND LIB
audaspace-py
)
endif()
list(APPEND INC_SYS
${PYTHON_INCLUDE_DIRS}
)
list(APPEND SRC
intern/AUD_PyInit.cpp
intern/AUD_PyInit.h
)
if(NOT WITH_SYSTEM_AUDASPACE)
list(APPEND LIB
audaspace-py
)
endif()
add_definitions(-DWITH_PYTHON)
add_definitions(-DWITH_PYTHON)
endif()
blender_add_lib(bf_intern_audaspace "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

View File

@@ -34,46 +34,47 @@ extern void *BKE_sound_get_factory(void *sound);
static PyObject *AUD_getSoundFromPointer(PyObject *self, PyObject *args)
{
long int lptr;
long int lptr;
if (PyArg_Parse(args, "l:_sound_from_pointer", &lptr)) {
if (lptr) {
AUD_Sound* sound = BKE_sound_get_factory((void *) lptr);
if (PyArg_Parse(args, "l:_sound_from_pointer", &lptr)) {
if (lptr) {
AUD_Sound *sound = BKE_sound_get_factory((void *)lptr);
if (sound) {
Sound *obj = (Sound *)Sound_empty();
if (obj) {
obj->sound = AUD_Sound_copy(sound);
return (PyObject *) obj;
}
}
}
}
if (sound) {
Sound *obj = (Sound *)Sound_empty();
if (obj) {
obj->sound = AUD_Sound_copy(sound);
return (PyObject *)obj;
}
}
}
}
Py_RETURN_NONE;
Py_RETURN_NONE;
}
static PyMethodDef meth_sound_from_pointer[] = {
{"_sound_from_pointer", (PyCFunction)AUD_getSoundFromPointer, METH_O,
{"_sound_from_pointer",
(PyCFunction)AUD_getSoundFromPointer,
METH_O,
"_sound_from_pointer(pointer)\n\n"
"Returns the corresponding :class:`Factory` object.\n\n"
":arg pointer: The pointer to the bSound object as long.\n"
":type pointer: long\n"
":return: The corresponding :class:`Factory` object.\n"
":rtype: :class:`Factory`"}
};
":rtype: :class:`Factory`"}};
PyObject *AUD_initPython(void)
{
PyObject *module = PyInit_aud();
if (module == NULL) {
printf("Unable to initialise audio\n");
return NULL;
}
PyObject *module = PyInit_aud();
if (module == NULL) {
printf("Unable to initialise audio\n");
return NULL;
}
PyModule_AddObject(module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyDict_SetItemString(PyImport_GetModuleDict(), "aud", module);
PyModule_AddObject(
module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyDict_SetItemString(PyImport_GetModuleDict(), "aud", module);
return module;
return module;
}

View File

@@ -22,26 +22,25 @@
* \ingroup audaspaceintern
*/
#ifndef __AUD_PYINIT_H__
#define __AUD_PYINIT_H__
#ifdef WITH_PYTHON
#include "Python.h"
# include "Python.h"
#ifdef __cplusplus
# ifdef __cplusplus
extern "C" {
#endif
# endif
/**
* Initializes the Python module.
*/
extern PyObject *AUD_initPython(void);
#ifdef __cplusplus
# ifdef __cplusplus
}
#endif
# endif
#endif
#endif //__AUD_PYINIT_H__
#endif //__AUD_PYINIT_H__

View File

@@ -28,38 +28,38 @@
void *AUD_createSet()
{
return new std::set<void *>();
return new std::set<void *>();
}
void AUD_destroySet(void *set)
{
delete reinterpret_cast<std::set<void *>*>(set);
delete reinterpret_cast<std::set<void *> *>(set);
}
char AUD_removeSet(void *set, void *entry)
{
if (set)
return reinterpret_cast<std::set<void *>*>(set)->erase(entry);
return 0;
if (set)
return reinterpret_cast<std::set<void *> *>(set)->erase(entry);
return 0;
}
void AUD_addSet(void *set, void *entry)
{
if (entry)
reinterpret_cast<std::set<void *>*>(set)->insert(entry);
if (entry)
reinterpret_cast<std::set<void *> *>(set)->insert(entry);
}
void *AUD_getSet(void *set)
{
if (set) {
std::set<void *>* rset = reinterpret_cast<std::set<void *>*>(set);
if (!rset->empty()) {
std::set<void *>::iterator it = rset->begin();
void *result = *it;
rset->erase(it);
return result;
}
}
if (set) {
std::set<void *> *rset = reinterpret_cast<std::set<void *> *>(set);
if (!rset->empty()) {
std::set<void *>::iterator it = rset->begin();
void *result = *it;
rset->erase(it);
return result;
}
}
return (void*) 0;
return (void *)0;
}

View File

@@ -67,4 +67,4 @@ extern void *AUD_getSet(void *set);
}
#endif
#endif //__AUD_SET_H__
#endif //__AUD_SET_H__

View File

@@ -73,13 +73,14 @@ extern "C" {
#endif /* __cplusplus */
#ifdef __GNUC__
# define _CLOG_ATTR_NONNULL(args ...) __attribute__((nonnull(args)))
# define _CLOG_ATTR_NONNULL(args...) __attribute__((nonnull(args)))
#else
# define _CLOG_ATTR_NONNULL(...)
#endif
#ifdef __GNUC__
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) __attribute__((format(printf, format_param, dots_param)))
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) \
__attribute__((format(printf, format_param, dots_param)))
#else
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param)
#endif
@@ -92,41 +93,44 @@ struct CLogContext;
/* Don't typedef enums. */
enum CLG_LogFlag {
CLG_FLAG_USE = (1 << 0),
CLG_FLAG_USE = (1 << 0),
};
enum CLG_Severity {
CLG_SEVERITY_INFO = 0,
CLG_SEVERITY_WARN,
CLG_SEVERITY_ERROR,
CLG_SEVERITY_FATAL,
CLG_SEVERITY_INFO = 0,
CLG_SEVERITY_WARN,
CLG_SEVERITY_ERROR,
CLG_SEVERITY_FATAL,
};
#define CLG_SEVERITY_LEN (CLG_SEVERITY_FATAL + 1)
/* Each logger ID has one of these. */
typedef struct CLG_LogType {
struct CLG_LogType *next;
char identifier[64];
/** FILE output. */
struct CLogContext *ctx;
/** Control behavior. */
int level;
enum CLG_LogFlag flag;
struct CLG_LogType *next;
char identifier[64];
/** FILE output. */
struct CLogContext *ctx;
/** Control behavior. */
int level;
enum CLG_LogFlag flag;
} CLG_LogType;
typedef struct CLG_LogRef {
const char *identifier;
CLG_LogType *type;
const char *identifier;
CLG_LogType *type;
} CLG_LogRef;
void CLG_log_str(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *message)
_CLOG_ATTR_NONNULL(1, 3, 4, 5);
void CLG_logf(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *format, ...)
_CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
void CLG_log_str(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *message) _CLOG_ATTR_NONNULL(1, 3, 4, 5);
void CLG_logf(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *format,
...) _CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
/* Main initializer and distructor (per session, not logger). */
void CLG_init(void);
@@ -147,51 +151,63 @@ void CLG_logref_init(CLG_LogRef *clg_ref);
/** Declare outside function, declare as extern in header. */
#define CLG_LOGREF_DECLARE_GLOBAL(var, id) \
static CLG_LogRef _static_ ## var = {id}; \
CLG_LogRef *var = &_static_ ## var
static CLG_LogRef _static_##var = {id}; \
CLG_LogRef *var = &_static_##var
/** Initialize struct once. */
#define CLOG_ENSURE(clg_ref) \
((clg_ref)->type ? (clg_ref)->type : (CLG_logref_init(clg_ref), (clg_ref)->type))
((clg_ref)->type ? (clg_ref)->type : (CLG_logref_init(clg_ref), (clg_ref)->type))
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) { \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
CLG_logf(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, __VA_ARGS__); \
} \
} ((void)0)
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
CLG_logf(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, __VA_ARGS__); \
} \
} \
((void)0)
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) { \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, str); \
} \
} ((void)0)
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, str); \
} \
} \
((void)0)
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) { \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
const char *_str = str; \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, _str); \
MEM_freeN((void *)_str); \
} \
} ((void)0)
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
const char *_str = str; \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, _str); \
MEM_freeN((void *)_str); \
} \
} \
((void)0)
#define CLOG_INFO(clg_ref, level, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_WARN(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, __VA_ARGS__)
#define CLOG_ERROR(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, __VA_ARGS__)
#define CLOG_FATAL(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, __VA_ARGS__)
#define CLOG_INFO(clg_ref, level, ...) \
CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_WARN(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, __VA_ARGS__)
#define CLOG_ERROR(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, __VA_ARGS__)
#define CLOG_FATAL(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, __VA_ARGS__)
#define CLOG_STR_INFO(clg_ref, level, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, str)
#define CLOG_STR_INFO(clg_ref, level, str) \
CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, str)
/* Allocated string which is immediately freed. */
#define CLOG_STR_INFO_N(clg_ref, level, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_FATAL, 0, str)
#define CLOG_STR_INFO_N(clg_ref, level, str) \
CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_FATAL, 0, str)
#ifdef __cplusplus
}

View File

@@ -17,9 +17,9 @@
# ***** END GPL LICENSE BLOCK *****
set(INC
.
../atomic
../guardedalloc
.
../atomic
../guardedalloc
)
set(INC_SYS
@@ -27,9 +27,9 @@ set(INC_SYS
)
set(SRC
clog.c
clog.c
CLG_log.h
CLG_log.h
)
set(LIB

View File

@@ -46,7 +46,6 @@
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
/* Only other dependency (could use regular malloc too). */
#include "MEM_guardedalloc.h"
@@ -68,40 +67,40 @@
* \{ */
typedef struct CLG_IDFilter {
struct CLG_IDFilter *next;
/** Over alloc. */
char match[0];
struct CLG_IDFilter *next;
/** Over alloc. */
char match[0];
} CLG_IDFilter;
typedef struct CLogContext {
/** Single linked list of types. */
CLG_LogType *types;
/** Single linked list of types. */
CLG_LogType *types;
#ifdef WITH_CLOG_PTHREADS
pthread_mutex_t types_lock;
pthread_mutex_t types_lock;
#endif
/* exclude, include filters. */
CLG_IDFilter *filters[2];
bool use_color;
bool use_basename;
bool use_timestamp;
/* exclude, include filters. */
CLG_IDFilter *filters[2];
bool use_color;
bool use_basename;
bool use_timestamp;
/** Borrowed, not owned. */
int output;
FILE *output_file;
/** Borrowed, not owned. */
int output;
FILE *output_file;
/** For timer (use_timestamp). */
uint64_t timestamp_tick_start;
/** For timer (use_timestamp). */
uint64_t timestamp_tick_start;
/** For new types. */
struct {
int level;
} default_type;
/** For new types. */
struct {
int level;
} default_type;
struct {
void (*fatal_fn)(void *file_handle);
void (*backtrace_fn)(void *file_handle);
} callbacks;
struct {
void (*fatal_fn)(void *file_handle);
void (*backtrace_fn)(void *file_handle);
} callbacks;
} CLogContext;
/** \} */
@@ -115,92 +114,92 @@ typedef struct CLogContext {
#define CLOG_BUF_LEN_INIT 512
typedef struct CLogStringBuf {
char *data;
uint len;
uint len_alloc;
bool is_alloc;
char *data;
uint len;
uint len_alloc;
bool is_alloc;
} CLogStringBuf;
static void clg_str_init(CLogStringBuf *cstr, char *buf_stack, uint buf_stack_len)
{
cstr->data = buf_stack;
cstr->len_alloc = buf_stack_len;
cstr->len = 0;
cstr->is_alloc = false;
cstr->data = buf_stack;
cstr->len_alloc = buf_stack_len;
cstr->len = 0;
cstr->is_alloc = false;
}
static void clg_str_free(CLogStringBuf *cstr)
{
if (cstr->is_alloc) {
MEM_freeN(cstr->data);
}
if (cstr->is_alloc) {
MEM_freeN(cstr->data);
}
}
static void clg_str_reserve(CLogStringBuf *cstr, const uint len)
{
if (len > cstr->len_alloc) {
cstr->len_alloc *= 2;
if (len > cstr->len_alloc) {
cstr->len_alloc = len;
}
if (len > cstr->len_alloc) {
cstr->len_alloc *= 2;
if (len > cstr->len_alloc) {
cstr->len_alloc = len;
}
if (cstr->is_alloc) {
cstr->data = MEM_reallocN(cstr->data, cstr->len_alloc);
}
else {
/* Copy the static buffer. */
char *data = MEM_mallocN(cstr->len_alloc, __func__);
memcpy(data, cstr->data, cstr->len);
cstr->data = data;
cstr->is_alloc = true;
}
cstr->len_alloc = len;
}
if (cstr->is_alloc) {
cstr->data = MEM_reallocN(cstr->data, cstr->len_alloc);
}
else {
/* Copy the static buffer. */
char *data = MEM_mallocN(cstr->len_alloc, __func__);
memcpy(data, cstr->data, cstr->len);
cstr->data = data;
cstr->is_alloc = true;
}
cstr->len_alloc = len;
}
}
static void clg_str_append_with_len(CLogStringBuf *cstr, const char *str, const uint len)
{
uint len_next = cstr->len + len;
clg_str_reserve(cstr, len_next);
char *str_dst = cstr->data + cstr->len;
memcpy(str_dst, str, len);
uint len_next = cstr->len + len;
clg_str_reserve(cstr, len_next);
char *str_dst = cstr->data + cstr->len;
memcpy(str_dst, str, len);
#if 0 /* no need. */
str_dst[len] = '\0';
str_dst[len] = '\0';
#endif
cstr->len = len_next;
cstr->len = len_next;
}
static void clg_str_append(CLogStringBuf *cstr, const char *str)
{
clg_str_append_with_len(cstr, str, strlen(str));
clg_str_append_with_len(cstr, str, strlen(str));
}
static void clg_str_vappendf(CLogStringBuf *cstr, const char *fmt, va_list args)
{
/* Use limit because windows may use '-1' for a formatting error. */
const uint len_max = 65535;
uint len_avail = (cstr->len_alloc - cstr->len);
if (len_avail == 0) {
len_avail = CLOG_BUF_LEN_INIT;
clg_str_reserve(cstr, len_avail);
}
while (true) {
va_list args_cpy;
va_copy(args_cpy, args);
int retval = vsnprintf(cstr->data + cstr->len, len_avail, fmt, args_cpy);
va_end(args_cpy);
if (retval != -1) {
cstr->len += retval;
break;
}
else {
len_avail *= 2;
if (len_avail >= len_max) {
break;
}
clg_str_reserve(cstr, len_avail);
}
}
/* Use limit because windows may use '-1' for a formatting error. */
const uint len_max = 65535;
uint len_avail = (cstr->len_alloc - cstr->len);
if (len_avail == 0) {
len_avail = CLOG_BUF_LEN_INIT;
clg_str_reserve(cstr, len_avail);
}
while (true) {
va_list args_cpy;
va_copy(args_cpy, args);
int retval = vsnprintf(cstr->data + cstr->len, len_avail, fmt, args_cpy);
va_end(args_cpy);
if (retval != -1) {
cstr->len += retval;
break;
}
else {
len_avail *= 2;
if (len_avail >= len_max) {
break;
}
clg_str_reserve(cstr, len_avail);
}
}
}
/** \} */
@@ -210,12 +209,12 @@ static void clg_str_vappendf(CLogStringBuf *cstr, const char *fmt, va_list args)
* \{ */
enum eCLogColor {
COLOR_DEFAULT,
COLOR_RED,
COLOR_GREEN,
COLOR_YELLOW,
COLOR_DEFAULT,
COLOR_RED,
COLOR_GREEN,
COLOR_YELLOW,
COLOR_RESET,
COLOR_RESET,
};
#define COLOR_LEN (COLOR_RESET + 1)
@@ -223,61 +222,61 @@ static const char *clg_color_table[COLOR_LEN] = {NULL};
static void clg_color_table_init(bool use_color)
{
for (int i = 0; i < COLOR_LEN; i++) {
clg_color_table[i] = "";
}
if (use_color) {
for (int i = 0; i < COLOR_LEN; i++) {
clg_color_table[i] = "";
}
if (use_color) {
#ifdef _WIN32
/* TODO */
/* TODO */
#else
clg_color_table[COLOR_DEFAULT] = "\033[1;37m";
clg_color_table[COLOR_RED] = "\033[1;31m";
clg_color_table[COLOR_GREEN] = "\033[1;32m";
clg_color_table[COLOR_YELLOW] = "\033[1;33m";
clg_color_table[COLOR_RESET] = "\033[0m";
clg_color_table[COLOR_DEFAULT] = "\033[1;37m";
clg_color_table[COLOR_RED] = "\033[1;31m";
clg_color_table[COLOR_GREEN] = "\033[1;32m";
clg_color_table[COLOR_YELLOW] = "\033[1;33m";
clg_color_table[COLOR_RESET] = "\033[0m";
#endif
}
}
}
static const char *clg_severity_str[CLG_SEVERITY_LEN] = {
[CLG_SEVERITY_INFO] = "INFO",
[CLG_SEVERITY_WARN] = "WARN",
[CLG_SEVERITY_ERROR] = "ERROR",
[CLG_SEVERITY_FATAL] = "FATAL",
[CLG_SEVERITY_INFO] = "INFO",
[CLG_SEVERITY_WARN] = "WARN",
[CLG_SEVERITY_ERROR] = "ERROR",
[CLG_SEVERITY_FATAL] = "FATAL",
};
static const char *clg_severity_as_text(enum CLG_Severity severity)
{
bool ok = (unsigned int)severity < CLG_SEVERITY_LEN;
assert(ok);
if (ok) {
return clg_severity_str[severity];
}
else {
return "INVALID_SEVERITY";
}
bool ok = (unsigned int)severity < CLG_SEVERITY_LEN;
assert(ok);
if (ok) {
return clg_severity_str[severity];
}
else {
return "INVALID_SEVERITY";
}
}
static enum eCLogColor clg_severity_to_color(enum CLG_Severity severity)
{
assert((unsigned int)severity < CLG_SEVERITY_LEN);
enum eCLogColor color = COLOR_DEFAULT;
switch (severity) {
case CLG_SEVERITY_INFO:
color = COLOR_DEFAULT;
break;
case CLG_SEVERITY_WARN:
color = COLOR_YELLOW;
break;
case CLG_SEVERITY_ERROR:
case CLG_SEVERITY_FATAL:
color = COLOR_RED;
break;
default:
/* should never get here. */
assert(false);
}
return color;
assert((unsigned int)severity < CLG_SEVERITY_LEN);
enum eCLogColor color = COLOR_DEFAULT;
switch (severity) {
case CLG_SEVERITY_INFO:
color = COLOR_DEFAULT;
break;
case CLG_SEVERITY_WARN:
color = COLOR_YELLOW;
break;
case CLG_SEVERITY_ERROR:
case CLG_SEVERITY_FATAL:
color = COLOR_RED;
break;
default:
/* should never get here. */
assert(false);
}
return color;
}
/** \} */
@@ -295,27 +294,24 @@ static enum eCLogColor clg_severity_to_color(enum CLG_Severity severity)
*/
static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier)
{
const int identifier_len = strlen(identifier);
for (uint i = 0; i < 2; i++) {
const CLG_IDFilter *flt = ctx->filters[i];
while (flt != NULL) {
const int len = strlen(flt->match);
if (STREQ(flt->match, "*") ||
((len == identifier_len) && (STREQ(identifier, flt->match))))
{
return (bool)i;
}
if ((len >= 2) && (STREQLEN(".*", &flt->match[len - 2], 2))) {
if (((identifier_len == len - 2) && STREQLEN(identifier, flt->match, len - 2)) ||
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1)))
{
return (bool)i;
}
}
flt = flt->next;
}
}
return false;
const int identifier_len = strlen(identifier);
for (uint i = 0; i < 2; i++) {
const CLG_IDFilter *flt = ctx->filters[i];
while (flt != NULL) {
const int len = strlen(flt->match);
if (STREQ(flt->match, "*") || ((len == identifier_len) && (STREQ(identifier, flt->match)))) {
return (bool)i;
}
if ((len >= 2) && (STREQLEN(".*", &flt->match[len - 2], 2))) {
if (((identifier_len == len - 2) && STREQLEN(identifier, flt->match, len - 2)) ||
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1))) {
return (bool)i;
}
}
flt = flt->next;
}
}
return false;
}
/**
@@ -324,58 +320,58 @@ static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier)
*/
static CLG_LogType *clg_ctx_type_find_by_name(CLogContext *ctx, const char *identifier)
{
for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) {
if (STREQ(identifier, ty->identifier)) {
return ty;
}
}
return NULL;
for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) {
if (STREQ(identifier, ty->identifier)) {
return ty;
}
}
return NULL;
}
static CLG_LogType *clg_ctx_type_register(CLogContext *ctx, const char *identifier)
{
assert(clg_ctx_type_find_by_name(ctx, identifier) == NULL);
CLG_LogType *ty = MEM_callocN(sizeof(*ty), __func__);
ty->next = ctx->types;
ctx->types = ty;
strncpy(ty->identifier, identifier, sizeof(ty->identifier) - 1);
ty->ctx = ctx;
ty->level = ctx->default_type.level;
assert(clg_ctx_type_find_by_name(ctx, identifier) == NULL);
CLG_LogType *ty = MEM_callocN(sizeof(*ty), __func__);
ty->next = ctx->types;
ctx->types = ty;
strncpy(ty->identifier, identifier, sizeof(ty->identifier) - 1);
ty->ctx = ctx;
ty->level = ctx->default_type.level;
if (clg_ctx_filter_check(ctx, ty->identifier)) {
ty->flag |= CLG_FLAG_USE;
}
return ty;
if (clg_ctx_filter_check(ctx, ty->identifier)) {
ty->flag |= CLG_FLAG_USE;
}
return ty;
}
static void clg_ctx_fatal_action(CLogContext *ctx)
{
if (ctx->callbacks.fatal_fn != NULL) {
ctx->callbacks.fatal_fn(ctx->output_file);
}
fflush(ctx->output_file);
abort();
if (ctx->callbacks.fatal_fn != NULL) {
ctx->callbacks.fatal_fn(ctx->output_file);
}
fflush(ctx->output_file);
abort();
}
static void clg_ctx_backtrace(CLogContext *ctx)
{
/* Note: we avoid writing fo 'FILE', for backtrace we make an exception,
* if necessary we could have a version of the callback that writes to file descriptor all at once. */
ctx->callbacks.backtrace_fn(ctx->output_file);
fflush(ctx->output_file);
/* Note: we avoid writing fo 'FILE', for backtrace we make an exception,
* if necessary we could have a version of the callback that writes to file descriptor all at once. */
ctx->callbacks.backtrace_fn(ctx->output_file);
fflush(ctx->output_file);
}
static uint64_t clg_timestamp_ticks_get(void)
{
uint64_t tick;
uint64_t tick;
#if defined(_MSC_VER)
tick = GetTickCount64();
tick = GetTickCount64();
#else
struct timeval tv;
gettimeofday(&tv, NULL);
tick = tv.tv_sec * 1000 + tv.tv_usec / 1000;
struct timeval tv;
gettimeofday(&tv, NULL);
tick = tv.tv_sec * 1000 + tv.tv_usec / 1000;
#endif
return tick;
return tick;
}
/** \} */
@@ -386,131 +382,140 @@ static uint64_t clg_timestamp_ticks_get(void)
static void write_timestamp(CLogStringBuf *cstr, const uint64_t timestamp_tick_start)
{
char timestamp_str[64];
const uint64_t timestamp = clg_timestamp_ticks_get() - timestamp_tick_start;
const uint timestamp_len = snprintf(
timestamp_str, sizeof(timestamp_str), "%" PRIu64 ".%03u ",
timestamp / 1000, (uint)(timestamp % 1000));
clg_str_append_with_len(cstr, timestamp_str, timestamp_len);
char timestamp_str[64];
const uint64_t timestamp = clg_timestamp_ticks_get() - timestamp_tick_start;
const uint timestamp_len = snprintf(timestamp_str,
sizeof(timestamp_str),
"%" PRIu64 ".%03u ",
timestamp / 1000,
(uint)(timestamp % 1000));
clg_str_append_with_len(cstr, timestamp_str, timestamp_len);
}
static void write_severity(CLogStringBuf *cstr, enum CLG_Severity severity, bool use_color)
{
assert((unsigned int)severity < CLG_SEVERITY_LEN);
if (use_color) {
enum eCLogColor color = clg_severity_to_color(severity);
clg_str_append(cstr, clg_color_table[color]);
clg_str_append(cstr, clg_severity_as_text(severity));
clg_str_append(cstr, clg_color_table[COLOR_RESET]);
}
else {
clg_str_append(cstr, clg_severity_as_text(severity));
}
assert((unsigned int)severity < CLG_SEVERITY_LEN);
if (use_color) {
enum eCLogColor color = clg_severity_to_color(severity);
clg_str_append(cstr, clg_color_table[color]);
clg_str_append(cstr, clg_severity_as_text(severity));
clg_str_append(cstr, clg_color_table[COLOR_RESET]);
}
else {
clg_str_append(cstr, clg_severity_as_text(severity));
}
}
static void write_type(CLogStringBuf *cstr, CLG_LogType *lg)
{
clg_str_append(cstr, " (");
clg_str_append(cstr, lg->identifier);
clg_str_append(cstr, "): ");
clg_str_append(cstr, " (");
clg_str_append(cstr, lg->identifier);
clg_str_append(cstr, "): ");
}
static void write_file_line_fn(CLogStringBuf *cstr, const char *file_line, const char *fn, const bool use_basename)
static void write_file_line_fn(CLogStringBuf *cstr,
const char *file_line,
const char *fn,
const bool use_basename)
{
uint file_line_len = strlen(file_line);
if (use_basename) {
uint file_line_offset = file_line_len;
while (file_line_offset-- > 0) {
if (file_line[file_line_offset] == PATHSEP_CHAR) {
file_line_offset++;
break;
}
}
file_line += file_line_offset;
file_line_len -= file_line_offset;
}
clg_str_append_with_len(cstr, file_line, file_line_len);
uint file_line_len = strlen(file_line);
if (use_basename) {
uint file_line_offset = file_line_len;
while (file_line_offset-- > 0) {
if (file_line[file_line_offset] == PATHSEP_CHAR) {
file_line_offset++;
break;
}
}
file_line += file_line_offset;
file_line_len -= file_line_offset;
}
clg_str_append_with_len(cstr, file_line, file_line_len);
clg_str_append(cstr, " ");
clg_str_append(cstr, fn);
clg_str_append(cstr, ": ");
clg_str_append(cstr, " ");
clg_str_append(cstr, fn);
clg_str_append(cstr, ": ");
}
void CLG_log_str(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *message)
void CLG_log_str(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *message)
{
CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT];
clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf));
CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT];
clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf));
if (lg->ctx->use_timestamp) {
write_timestamp(&cstr, lg->ctx->timestamp_tick_start);
}
if (lg->ctx->use_timestamp) {
write_timestamp(&cstr, lg->ctx->timestamp_tick_start);
}
write_severity(&cstr, severity, lg->ctx->use_color);
write_type(&cstr, lg);
write_severity(&cstr, severity, lg->ctx->use_color);
write_type(&cstr, lg);
{
write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename);
clg_str_append(&cstr, message);
}
clg_str_append(&cstr, "\n");
{
write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename);
clg_str_append(&cstr, message);
}
clg_str_append(&cstr, "\n");
/* could be optional */
int bytes_written = write(lg->ctx->output, cstr.data, cstr.len);
(void)bytes_written;
/* could be optional */
int bytes_written = write(lg->ctx->output, cstr.data, cstr.len);
(void)bytes_written;
clg_str_free(&cstr);
clg_str_free(&cstr);
if (lg->ctx->callbacks.backtrace_fn) {
clg_ctx_backtrace(lg->ctx);
}
if (lg->ctx->callbacks.backtrace_fn) {
clg_ctx_backtrace(lg->ctx);
}
if (severity == CLG_SEVERITY_FATAL) {
clg_ctx_fatal_action(lg->ctx);
}
if (severity == CLG_SEVERITY_FATAL) {
clg_ctx_fatal_action(lg->ctx);
}
}
void CLG_logf(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *fmt, ...)
void CLG_logf(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *fmt,
...)
{
CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT];
clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf));
CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT];
clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf));
if (lg->ctx->use_timestamp) {
write_timestamp(&cstr, lg->ctx->timestamp_tick_start);
}
if (lg->ctx->use_timestamp) {
write_timestamp(&cstr, lg->ctx->timestamp_tick_start);
}
write_severity(&cstr, severity, lg->ctx->use_color);
write_type(&cstr, lg);
write_severity(&cstr, severity, lg->ctx->use_color);
write_type(&cstr, lg);
{
write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename);
{
write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename);
va_list ap;
va_start(ap, fmt);
clg_str_vappendf(&cstr, fmt, ap);
va_end(ap);
}
clg_str_append(&cstr, "\n");
va_list ap;
va_start(ap, fmt);
clg_str_vappendf(&cstr, fmt, ap);
va_end(ap);
}
clg_str_append(&cstr, "\n");
/* could be optional */
int bytes_written = write(lg->ctx->output, cstr.data, cstr.len);
(void)bytes_written;
/* could be optional */
int bytes_written = write(lg->ctx->output, cstr.data, cstr.len);
(void)bytes_written;
clg_str_free(&cstr);
clg_str_free(&cstr);
if (lg->ctx->callbacks.backtrace_fn) {
clg_ctx_backtrace(lg->ctx);
}
if (lg->ctx->callbacks.backtrace_fn) {
clg_ctx_backtrace(lg->ctx);
}
if (severity == CLG_SEVERITY_FATAL) {
clg_ctx_fatal_action(lg->ctx);
}
if (severity == CLG_SEVERITY_FATAL) {
clg_ctx_fatal_action(lg->ctx);
}
}
/** \} */
@@ -521,99 +526,105 @@ void CLG_logf(
static void CLG_ctx_output_set(CLogContext *ctx, void *file_handle)
{
ctx->output_file = file_handle;
ctx->output = fileno(ctx->output_file);
ctx->output_file = file_handle;
ctx->output = fileno(ctx->output_file);
#if defined(__unix__) || defined(__APPLE__)
ctx->use_color = isatty(ctx->output);
ctx->use_color = isatty(ctx->output);
#endif
}
static void CLG_ctx_output_use_basename_set(CLogContext *ctx, int value)
{
ctx->use_basename = (bool)value;
ctx->use_basename = (bool)value;
}
static void CLG_ctx_output_use_timestamp_set(CLogContext *ctx, int value)
{
ctx->use_timestamp = (bool)value;
if (ctx->use_timestamp) {
ctx->timestamp_tick_start = clg_timestamp_ticks_get();
}
ctx->use_timestamp = (bool)value;
if (ctx->use_timestamp) {
ctx->timestamp_tick_start = clg_timestamp_ticks_get();
}
}
/** Action on fatal severity. */
static void CLG_ctx_fatal_fn_set(CLogContext *ctx, void (*fatal_fn)(void *file_handle))
{
ctx->callbacks.fatal_fn = fatal_fn;
ctx->callbacks.fatal_fn = fatal_fn;
}
static void CLG_ctx_backtrace_fn_set(CLogContext *ctx, void (*backtrace_fn)(void *file_handle))
{
ctx->callbacks.backtrace_fn = backtrace_fn;
ctx->callbacks.backtrace_fn = backtrace_fn;
}
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list, const char *type_match, int type_match_len)
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list,
const char *type_match,
int type_match_len)
{
if (type_match_len == 0) {
return;
}
CLG_IDFilter *flt = MEM_callocN(sizeof(*flt) + (type_match_len + 1), __func__);
flt->next = *flt_list;
*flt_list = flt;
memcpy(flt->match, type_match, type_match_len);
/* no need to null terminate since we calloc'd */
if (type_match_len == 0) {
return;
}
CLG_IDFilter *flt = MEM_callocN(sizeof(*flt) + (type_match_len + 1), __func__);
flt->next = *flt_list;
*flt_list = flt;
memcpy(flt->match, type_match, type_match_len);
/* no need to null terminate since we calloc'd */
}
static void CLG_ctx_type_filter_exclude(CLogContext *ctx, const char *type_match, int type_match_len)
static void CLG_ctx_type_filter_exclude(CLogContext *ctx,
const char *type_match,
int type_match_len)
{
clg_ctx_type_filter_append(&ctx->filters[0], type_match, type_match_len);
clg_ctx_type_filter_append(&ctx->filters[0], type_match, type_match_len);
}
static void CLG_ctx_type_filter_include(CLogContext *ctx, const char *type_match, int type_match_len)
static void CLG_ctx_type_filter_include(CLogContext *ctx,
const char *type_match,
int type_match_len)
{
clg_ctx_type_filter_append(&ctx->filters[1], type_match, type_match_len);
clg_ctx_type_filter_append(&ctx->filters[1], type_match, type_match_len);
}
static void CLG_ctx_level_set(CLogContext *ctx, int level)
{
ctx->default_type.level = level;
for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) {
ty->level = level;
}
ctx->default_type.level = level;
for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) {
ty->level = level;
}
}
static CLogContext *CLG_ctx_init(void)
{
CLogContext *ctx = MEM_callocN(sizeof(*ctx), __func__);
CLogContext *ctx = MEM_callocN(sizeof(*ctx), __func__);
#ifdef WITH_CLOG_PTHREADS
pthread_mutex_init(&ctx->types_lock, NULL);
pthread_mutex_init(&ctx->types_lock, NULL);
#endif
ctx->use_color = true;
ctx->default_type.level = 1;
CLG_ctx_output_set(ctx, stdout);
ctx->use_color = true;
ctx->default_type.level = 1;
CLG_ctx_output_set(ctx, stdout);
return ctx;
return ctx;
}
static void CLG_ctx_free(CLogContext *ctx)
{
while (ctx->types != NULL) {
CLG_LogType *item = ctx->types;
ctx->types = item->next;
MEM_freeN(item);
}
while (ctx->types != NULL) {
CLG_LogType *item = ctx->types;
ctx->types = item->next;
MEM_freeN(item);
}
for (uint i = 0; i < 2; i++) {
while (ctx->filters[i] != NULL) {
CLG_IDFilter *item = ctx->filters[i];
ctx->filters[i] = item->next;
MEM_freeN(item);
}
}
for (uint i = 0; i < 2; i++) {
while (ctx->filters[i] != NULL) {
CLG_IDFilter *item = ctx->filters[i];
ctx->filters[i] = item->next;
MEM_freeN(item);
}
}
#ifdef WITH_CLOG_PTHREADS
pthread_mutex_destroy(&ctx->types_lock);
pthread_mutex_destroy(&ctx->types_lock);
#endif
MEM_freeN(ctx);
MEM_freeN(ctx);
}
/** \} */
@@ -629,57 +640,56 @@ static struct CLogContext *g_ctx = NULL;
void CLG_init(void)
{
g_ctx = CLG_ctx_init();
g_ctx = CLG_ctx_init();
clg_color_table_init(g_ctx->use_color);
clg_color_table_init(g_ctx->use_color);
}
void CLG_exit(void)
{
CLG_ctx_free(g_ctx);
CLG_ctx_free(g_ctx);
}
void CLG_output_set(void *file_handle)
{
CLG_ctx_output_set(g_ctx, file_handle);
CLG_ctx_output_set(g_ctx, file_handle);
}
void CLG_output_use_basename_set(int value)
{
CLG_ctx_output_use_basename_set(g_ctx, value);
CLG_ctx_output_use_basename_set(g_ctx, value);
}
void CLG_output_use_timestamp_set(int value)
{
CLG_ctx_output_use_timestamp_set(g_ctx, value);
CLG_ctx_output_use_timestamp_set(g_ctx, value);
}
void CLG_fatal_fn_set(void (*fatal_fn)(void *file_handle))
{
CLG_ctx_fatal_fn_set(g_ctx, fatal_fn);
CLG_ctx_fatal_fn_set(g_ctx, fatal_fn);
}
void CLG_backtrace_fn_set(void (*fatal_fn)(void *file_handle))
{
CLG_ctx_backtrace_fn_set(g_ctx, fatal_fn);
CLG_ctx_backtrace_fn_set(g_ctx, fatal_fn);
}
void CLG_type_filter_exclude(const char *type_match, int type_match_len)
{
CLG_ctx_type_filter_exclude(g_ctx, type_match, type_match_len);
CLG_ctx_type_filter_exclude(g_ctx, type_match, type_match_len);
}
void CLG_type_filter_include(const char *type_match, int type_match_len)
{
CLG_ctx_type_filter_include(g_ctx, type_match, type_match_len);
CLG_ctx_type_filter_include(g_ctx, type_match, type_match_len);
}
void CLG_level_set(int level)
{
CLG_ctx_level_set(g_ctx, level);
CLG_ctx_level_set(g_ctx, level);
}
/** \} */
/* -------------------------------------------------------------------- */
@@ -690,22 +700,22 @@ void CLG_level_set(int level)
void CLG_logref_init(CLG_LogRef *clg_ref)
{
#ifdef WITH_CLOG_PTHREADS
/* Only runs once when initializing a static type in most cases. */
pthread_mutex_lock(&g_ctx->types_lock);
/* Only runs once when initializing a static type in most cases. */
pthread_mutex_lock(&g_ctx->types_lock);
#endif
if (clg_ref->type == NULL) {
CLG_LogType *clg_ty = clg_ctx_type_find_by_name(g_ctx, clg_ref->identifier);
if (clg_ty == NULL) {
clg_ty = clg_ctx_type_register(g_ctx, clg_ref->identifier);
}
if (clg_ref->type == NULL) {
CLG_LogType *clg_ty = clg_ctx_type_find_by_name(g_ctx, clg_ref->identifier);
if (clg_ty == NULL) {
clg_ty = clg_ctx_type_register(g_ctx, clg_ref->identifier);
}
#ifdef WITH_CLOG_PTHREADS
atomic_cas_ptr((void **)&clg_ref->type, clg_ref->type, clg_ty);
atomic_cas_ptr((void **)&clg_ref->type, clg_ref->type, clg_ty);
#else
clg_ref->type = clg_ty;
clg_ref->type = clg_ty;
#endif
}
}
#ifdef WITH_CLOG_PTHREADS
pthread_mutex_unlock(&g_ctx->types_lock);
pthread_mutex_unlock(&g_ctx->types_lock);
#endif
}

View File

@@ -1,16 +1,16 @@
# Standalone or with Blender
if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE)
set(CYCLES_INSTALL_PATH "")
set(CYCLES_INSTALL_PATH "")
else()
set(WITH_CYCLES_BLENDER ON)
# WINDOWS_PYTHON_DEBUG needs to write into the user addons folder since it will
# be started with --env-system-scripts pointing to the release folder, which will
# lack the cycles addon, and we don't want to write into it.
if(NOT WINDOWS_PYTHON_DEBUG)
set(CYCLES_INSTALL_PATH "scripts/addons/cycles")
else()
set(CYCLES_INSTALL_PATH "$ENV{appdata}/blender foundation/blender/${BLENDER_VERSION}/scripts/addons/cycles")
endif()
set(WITH_CYCLES_BLENDER ON)
# WINDOWS_PYTHON_DEBUG needs to write into the user addons folder since it will
# be started with --env-system-scripts pointing to the release folder, which will
# lack the cycles addon, and we don't want to write into it.
if(NOT WINDOWS_PYTHON_DEBUG)
set(CYCLES_INSTALL_PATH "scripts/addons/cycles")
else()
set(CYCLES_INSTALL_PATH "$ENV{appdata}/blender foundation/blender/${BLENDER_VERSION}/scripts/addons/cycles")
endif()
endif()
# External Libraries
@@ -23,329 +23,329 @@ include(cmake/macros.cmake)
# note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm)
if(WITH_CYCLES_NATIVE_ONLY)
set(CXX_HAS_SSE FALSE)
set(CXX_HAS_AVX FALSE)
set(CXX_HAS_AVX2 FALSE)
add_definitions(
-DWITH_KERNEL_NATIVE
)
set(CXX_HAS_SSE FALSE)
set(CXX_HAS_AVX FALSE)
set(CXX_HAS_AVX2 FALSE)
add_definitions(
-DWITH_KERNEL_NATIVE
)
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CYCLES_KERNEL_FLAGS "-march=native")
endif()
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CYCLES_KERNEL_FLAGS "-march=native")
endif()
elseif(NOT WITH_CPU_SSE)
set(CXX_HAS_SSE FALSE)
set(CXX_HAS_AVX FALSE)
set(CXX_HAS_AVX2 FALSE)
set(CXX_HAS_SSE FALSE)
set(CXX_HAS_AVX FALSE)
set(CXX_HAS_AVX2 FALSE)
elseif(WIN32 AND MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CXX_HAS_SSE TRUE)
set(CXX_HAS_AVX TRUE)
set(CXX_HAS_AVX2 TRUE)
set(CXX_HAS_SSE TRUE)
set(CXX_HAS_AVX TRUE)
set(CXX_HAS_AVX2 TRUE)
# /arch:AVX for VC2012 and above
if(NOT MSVC_VERSION LESS 1700)
set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX")
set(CYCLES_AVX2_ARCH_FLAGS "/arch:AVX /arch:AVX2")
elseif(NOT CMAKE_CL_64)
set(CYCLES_AVX_ARCH_FLAGS "/arch:SSE2")
set(CYCLES_AVX2_ARCH_FLAGS "/arch:SSE2")
endif()
# /arch:AVX for VC2012 and above
if(NOT MSVC_VERSION LESS 1700)
set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX")
set(CYCLES_AVX2_ARCH_FLAGS "/arch:AVX /arch:AVX2")
elseif(NOT CMAKE_CL_64)
set(CYCLES_AVX_ARCH_FLAGS "/arch:SSE2")
set(CYCLES_AVX2_ARCH_FLAGS "/arch:SSE2")
endif()
# Unlike GCC/clang we still use fast math, because there is no fine
# grained control and the speedup we get here is too big to ignore.
set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
# Unlike GCC/clang we still use fast math, because there is no fine
# grained control and the speedup we get here is too big to ignore.
set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
# there is no /arch:SSE3, but intrinsics are available anyway
if(CMAKE_CL_64)
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
else()
set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
endif()
# there is no /arch:SSE3, but intrinsics are available anyway
if(CMAKE_CL_64)
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
else()
set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox")
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox")
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
# Assume no signal trapping for better code generation.
set(CYCLES_KERNEL_FLAGS "-fno-trapping-math")
# Avoid overhead of setting errno for NaNs.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-math-errno")
# Let compiler optimize 0.0 - x without worrying about signed zeros.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signed-zeros")
# Assume no signal trapping for better code generation.
set(CYCLES_KERNEL_FLAGS "-fno-trapping-math")
# Avoid overhead of setting errno for NaNs.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-math-errno")
# Let compiler optimize 0.0 - x without worrying about signed zeros.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signed-zeros")
if(CMAKE_COMPILER_IS_GNUCC)
# Assume no signal trapping for better code generation.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signaling-nans")
# Assume a fixed rounding mode for better constant folding.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-rounding-math")
endif()
if(CMAKE_COMPILER_IS_GNUCC)
# Assume no signal trapping for better code generation.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signaling-nans")
# Assume a fixed rounding mode for better constant folding.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-rounding-math")
endif()
if(CXX_HAS_SSE)
if(CMAKE_COMPILER_IS_GNUCC)
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -mfpmath=sse")
endif()
if(CXX_HAS_SSE)
if(CMAKE_COMPILER_IS_GNUCC)
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -mfpmath=sse")
endif()
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
endif()
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}")
elseif(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Intel")
check_cxx_compiler_flag(/QxSSE2 CXX_HAS_SSE)
check_cxx_compiler_flag(/arch:AVX CXX_HAS_AVX)
check_cxx_compiler_flag(/QxCORE-AVX2 CXX_HAS_AVX2)
check_cxx_compiler_flag(/QxSSE2 CXX_HAS_SSE)
check_cxx_compiler_flag(/arch:AVX CXX_HAS_AVX)
check_cxx_compiler_flag(/QxCORE-AVX2 CXX_HAS_AVX2)
if(CXX_HAS_SSE)
set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2")
set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3")
set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1")
if(CXX_HAS_SSE)
set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2")
set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3")
set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX")
endif()
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2")
endif()
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2")
endif()
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
if(APPLE)
# ICC does not support SSE2 flag on MacOSX
check_cxx_compiler_flag(-xssse3 CXX_HAS_SSE)
else()
check_cxx_compiler_flag(-xsse2 CXX_HAS_SSE)
endif()
if(APPLE)
# ICC does not support SSE2 flag on MacOSX
check_cxx_compiler_flag(-xssse3 CXX_HAS_SSE)
else()
check_cxx_compiler_flag(-xsse2 CXX_HAS_SSE)
endif()
check_cxx_compiler_flag(-xavx CXX_HAS_AVX)
check_cxx_compiler_flag(-xcore-avx2 CXX_HAS_AVX2)
check_cxx_compiler_flag(-xavx CXX_HAS_AVX)
check_cxx_compiler_flag(-xcore-avx2 CXX_HAS_AVX2)
if(CXX_HAS_SSE)
if(APPLE)
# ICC does not support SSE2 flag on MacOSX
set(CYCLES_SSE2_KERNEL_FLAGS "-xssse3")
else()
set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2")
endif()
if(CXX_HAS_SSE)
if(APPLE)
# ICC does not support SSE2 flag on MacOSX
set(CYCLES_SSE2_KERNEL_FLAGS "-xssse3")
else()
set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2")
endif()
set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1")
set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "-xavx")
endif()
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "-xavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2")
endif()
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2")
endif()
endif()
endif()
if(CXX_HAS_SSE)
add_definitions(
-DWITH_KERNEL_SSE2
-DWITH_KERNEL_SSE3
-DWITH_KERNEL_SSE41
)
add_definitions(
-DWITH_KERNEL_SSE2
-DWITH_KERNEL_SSE3
-DWITH_KERNEL_SSE41
)
endif()
if(CXX_HAS_AVX)
add_definitions(-DWITH_KERNEL_AVX)
add_definitions(-DWITH_KERNEL_AVX)
endif()
if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()
if(WITH_CYCLES_OSL)
if(WIN32 AND MSVC)
set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif()
if(WIN32 AND MSVC)
set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif()
endif()
# Definitions and Includes
add_definitions(
${BOOST_DEFINITIONS}
${OPENIMAGEIO_DEFINITIONS}
${BOOST_DEFINITIONS}
${OPENIMAGEIO_DEFINITIONS}
)
add_definitions(
-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {
-DCCL_NAMESPACE_END=}
-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {
-DCCL_NAMESPACE_END=}
)
if(WITH_CYCLES_STANDALONE_GUI)
add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
endif()
if(WITH_CYCLES_PTEX)
add_definitions(-DWITH_PTEX)
add_definitions(-DWITH_PTEX)
endif()
if(WITH_CYCLES_OSL)
add_definitions(-DWITH_OSL)
#osl 1.9.x
add_definitions(-DOSL_STATIC_BUILD)
#pre 1.9
add_definitions(-DOSL_STATIC_LIBRARY)
include_directories(
SYSTEM
${OSL_INCLUDE_DIR}
)
add_definitions(-DWITH_OSL)
#osl 1.9.x
add_definitions(-DOSL_STATIC_BUILD)
#pre 1.9
add_definitions(-DOSL_STATIC_LIBRARY)
include_directories(
SYSTEM
${OSL_INCLUDE_DIR}
)
endif()
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
add_definitions(-DEMBREE_STATIC_LIB)
include_directories(
SYSTEM
${EMBREE_INCLUDE_DIRS}
)
add_definitions(-DWITH_EMBREE)
add_definitions(-DEMBREE_STATIC_LIB)
include_directories(
SYSTEM
${EMBREE_INCLUDE_DIRS}
)
endif()
if(WITH_OPENSUBDIV)
add_definitions(-DWITH_OPENSUBDIV)
include_directories(
SYSTEM
${OPENSUBDIV_INCLUDE_DIR}
)
add_definitions(-DWITH_OPENSUBDIV)
include_directories(
SYSTEM
${OPENSUBDIV_INCLUDE_DIR}
)
endif()
if(WITH_CYCLES_STANDALONE)
set(WITH_CYCLES_DEVICE_OPENCL TRUE)
set(WITH_CYCLES_DEVICE_CUDA TRUE)
# Experimental and unfinished.
set(WITH_CYCLES_NETWORK FALSE)
set(WITH_CYCLES_DEVICE_OPENCL TRUE)
set(WITH_CYCLES_DEVICE_CUDA TRUE)
# Experimental and unfinished.
set(WITH_CYCLES_NETWORK FALSE)
endif()
# TODO(sergey): Consider removing it, only causes confusion in interface.
set(WITH_CYCLES_DEVICE_MULTI TRUE)
# Logging capabilities using GLog library.
if(WITH_CYCLES_LOGGING)
add_definitions(-DWITH_CYCLES_LOGGING)
add_definitions(${GLOG_DEFINES})
add_definitions(-DCYCLES_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE})
include_directories(
SYSTEM
${GLOG_INCLUDE_DIRS}
${GFLAGS_INCLUDE_DIRS}
)
add_definitions(-DWITH_CYCLES_LOGGING)
add_definitions(${GLOG_DEFINES})
add_definitions(-DCYCLES_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE})
include_directories(
SYSTEM
${GLOG_INCLUDE_DIRS}
${GFLAGS_INCLUDE_DIRS}
)
endif()
# Debugging capabilities (debug passes etc).
if(WITH_CYCLES_DEBUG)
add_definitions(-DWITH_CYCLES_DEBUG)
add_definitions(-DWITH_CYCLES_DEBUG)
endif()
if(NOT OPENIMAGEIO_PUGIXML_FOUND)
add_definitions(-DWITH_SYSTEM_PUGIXML)
add_definitions(-DWITH_SYSTEM_PUGIXML)
endif()
include_directories(
SYSTEM
${BOOST_INCLUDE_DIR}
${OPENIMAGEIO_INCLUDE_DIRS}
${OPENIMAGEIO_INCLUDE_DIRS}/OpenImageIO
${OPENEXR_INCLUDE_DIR}
${OPENEXR_INCLUDE_DIRS}
${PUGIXML_INCLUDE_DIR}
SYSTEM
${BOOST_INCLUDE_DIR}
${OPENIMAGEIO_INCLUDE_DIRS}
${OPENIMAGEIO_INCLUDE_DIRS}/OpenImageIO
${OPENEXR_INCLUDE_DIR}
${OPENEXR_INCLUDE_DIRS}
${PUGIXML_INCLUDE_DIR}
)
if(CYCLES_STANDALONE_REPOSITORY)
include_directories(../third_party/atomic)
include_directories(../third_party/atomic)
else()
include_directories(../atomic)
include_directories(../atomic)
endif()
# Warnings
if(CMAKE_COMPILER_IS_GNUCXX)
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")
unset(_has_cxxflag_float_conversion)
unset(_has_cxxflag_double_promotion)
unset(_has_no_error_unused_macros)
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")
unset(_has_cxxflag_float_conversion)
unset(_has_cxxflag_double_promotion)
unset(_has_no_error_unused_macros)
endif()
if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
if(MSVC)
set(MAX_MSVC 1800)
if(${CUDA_VERSION} EQUAL "8.0")
set(MAX_MSVC 1900)
elseif(${CUDA_VERSION} EQUAL "9.0")
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
elseif(${CUDA_VERSION} EQUAL "10.0")
set(MAX_MSVC 1999)
elseif(${CUDA_VERSION} EQUAL "10.1")
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
unset(MAX_MSVC)
elseif(APPLE)
if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
endif()
if(MSVC)
set(MAX_MSVC 1800)
if(${CUDA_VERSION} EQUAL "8.0")
set(MAX_MSVC 1900)
elseif(${CUDA_VERSION} EQUAL "9.0")
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
elseif(${CUDA_VERSION} EQUAL "10.0")
set(MAX_MSVC 1999)
elseif(${CUDA_VERSION} EQUAL "10.1")
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
unset(MAX_MSVC)
elseif(APPLE)
if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
endif()
endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
endif()
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
endif()
endif()
# Subdirectories
if(WITH_CYCLES_BLENDER)
add_definitions(-DWITH_BLENDER_GUARDEDALLOC)
add_subdirectory(blender)
add_definitions(-DWITH_BLENDER_GUARDEDALLOC)
add_subdirectory(blender)
endif()
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
if(WITH_OPENCOLORIO)
add_definitions(-DWITH_OCIO)
include_directories(
SYSTEM
${OPENCOLORIO_INCLUDE_DIRS}
)
add_definitions(-DWITH_OCIO)
include_directories(
SYSTEM
${OPENCOLORIO_INCLUDE_DIRS}
)
endif()
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
add_subdirectory(app)
add_subdirectory(app)
endif()
add_subdirectory(bvh)
@@ -359,9 +359,9 @@ add_subdirectory(util)
# TODO(sergey): Make this to work with standalone repository.
if(WITH_GTESTS)
add_subdirectory(test)
add_subdirectory(test)
endif()
if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE)
delayed_do_install(${CMAKE_BINARY_DIR}/bin)
delayed_do_install(${CMAKE_BINARY_DIR}/bin)
endif()

View File

@@ -1,6 +1,6 @@
set(INC
..
..
)
set(INC_SYS
)
@@ -8,46 +8,46 @@ set(INC_SYS
# NOTE: LIBRARIES contains all the libraries which are common
# across release and debug build types, stored in a linking order.
set(LIBRARIES
cycles_device
cycles_kernel
cycles_render
cycles_bvh
cycles_subd
cycles_graph
cycles_util
${BLENDER_GL_LIBRARIES}
${CYCLES_APP_GLEW_LIBRARY}
${PNG_LIBRARIES}
${JPEG_LIBRARIES}
${ZLIB_LIBRARIES}
${TIFF_LIBRARY}
${PTHREADS_LIBRARIES}
extern_clew
cycles_device
cycles_kernel
cycles_render
cycles_bvh
cycles_subd
cycles_graph
cycles_util
${BLENDER_GL_LIBRARIES}
${CYCLES_APP_GLEW_LIBRARY}
${PNG_LIBRARIES}
${JPEG_LIBRARIES}
${ZLIB_LIBRARIES}
${TIFF_LIBRARY}
${PTHREADS_LIBRARIES}
extern_clew
)
if(WITH_CUDA_DYNLOAD)
list(APPEND LIBRARIES extern_cuew)
list(APPEND LIBRARIES extern_cuew)
else()
list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY})
list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY})
endif()
if(WITH_CYCLES_OSL)
list(APPEND LIBRARIES cycles_kernel_osl)
list(APPEND LIBRARIES cycles_kernel_osl)
endif()
if(NOT CYCLES_STANDALONE_REPOSITORY)
list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc bf_intern_numaapi)
list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc bf_intern_numaapi)
endif()
if(WITH_CYCLES_LOGGING)
list(APPEND LIBRARIES
${GLOG_LIBRARIES}
${GFLAGS_LIBRARIES}
)
list(APPEND LIBRARIES
${GLOG_LIBRARIES}
${GFLAGS_LIBRARIES}
)
endif()
if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
list(APPEND LIBRARIES ${GLUT_LIBRARIES})
list(APPEND LIBRARIES ${GLUT_LIBRARIES})
endif()
# Common configuration.
@@ -62,7 +62,7 @@ link_directories(${OPENIMAGEIO_LIBPATH}
${OPENJPEG_LIBPATH})
if(WITH_OPENCOLORIO)
link_directories(${OPENCOLORIO_LIBPATH})
link_directories(${OPENCOLORIO_LIBPATH})
endif()
add_definitions(${GL_DEFINITIONS})
@@ -78,90 +78,90 @@ include_directories(SYSTEM ${INC_SYS})
#
# TODO(sergey): Think of a better place for this?
macro(cycles_target_link_libraries target)
target_link_libraries(${target} ${LIBRARIES})
if(WITH_CYCLES_OSL)
target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES})
endif()
if(WITH_CYCLES_EMBREE)
target_link_libraries(${target} ${EMBREE_LIBRARIES})
endif()
if(WITH_OPENSUBDIV)
target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
endif()
if(WITH_OPENCOLORIO)
target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
endif()
target_link_libraries(
${target}
${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES}
${PUGIXML_LIBRARIES}
${BOOST_LIBRARIES}
${CMAKE_DL_LIBS}
${PLATFORM_LINKLIBS}
)
target_link_libraries(${target} ${LIBRARIES})
if(WITH_CYCLES_OSL)
target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES})
endif()
if(WITH_CYCLES_EMBREE)
target_link_libraries(${target} ${EMBREE_LIBRARIES})
endif()
if(WITH_OPENSUBDIV)
target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
endif()
if(WITH_OPENCOLORIO)
target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
endif()
target_link_libraries(
${target}
${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES}
${PUGIXML_LIBRARIES}
${BOOST_LIBRARIES}
${CMAKE_DL_LIBS}
${PLATFORM_LINKLIBS}
)
endmacro()
# Application build targets
if(WITH_CYCLES_STANDALONE)
set(SRC
cycles_standalone.cpp
cycles_xml.cpp
cycles_xml.h
)
add_executable(cycles ${SRC})
cycles_target_link_libraries(cycles)
set(SRC
cycles_standalone.cpp
cycles_xml.cpp
cycles_xml.h
)
add_executable(cycles ${SRC})
cycles_target_link_libraries(cycles)
if(UNIX AND NOT APPLE)
set_target_properties(cycles PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif()
unset(SRC)
if(UNIX AND NOT APPLE)
set_target_properties(cycles PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif()
unset(SRC)
endif()
if(WITH_CYCLES_NETWORK)
set(SRC
cycles_server.cpp
)
add_executable(cycles_server ${SRC})
cycles_target_link_libraries(cycles_server)
set(SRC
cycles_server.cpp
)
add_executable(cycles_server ${SRC})
cycles_target_link_libraries(cycles_server)
if(UNIX AND NOT APPLE)
set_target_properties(cycles_server PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif()
unset(SRC)
if(UNIX AND NOT APPLE)
set_target_properties(cycles_server PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif()
unset(SRC)
endif()
if(WITH_CYCLES_CUBIN_COMPILER)
# 32 bit windows is special, nvrtc is not supported on x86, so even
# though we are building 32 bit blender a 64 bit cubin_cc will have
# to be build to compile the cubins.
if(MSVC AND NOT CMAKE_CL_64)
message("Building with CUDA not supported on 32 bit, skipped")
set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
else()
set(SRC
cycles_cubin_cc.cpp
)
set(INC
../../../extern/cuew/include
)
add_executable(cycles_cubin_cc ${SRC})
include_directories(${INC})
target_link_libraries(cycles_cubin_cc
extern_cuew
${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES}
${PUGIXML_LIBRARIES}
${BOOST_LIBRARIES}
${PLATFORM_LINKLIBS}
)
if(NOT CYCLES_STANDALONE_REPOSITORY)
target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc)
endif()
unset(SRC)
unset(INC)
endif()
# 32 bit windows is special, nvrtc is not supported on x86, so even
# though we are building 32 bit blender a 64 bit cubin_cc will have
# to be build to compile the cubins.
if(MSVC AND NOT CMAKE_CL_64)
message("Building with CUDA not supported on 32 bit, skipped")
set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
else()
set(SRC
cycles_cubin_cc.cpp
)
set(INC
../../../extern/cuew/include
)
add_executable(cycles_cubin_cc ${SRC})
include_directories(${INC})
target_link_libraries(cycles_cubin_cc
extern_cuew
${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES}
${PUGIXML_LIBRARIES}
${BOOST_LIBRARIES}
${PLATFORM_LINKLIBS}
)
if(NOT CYCLES_STANDALONE_REPOSITORY)
target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc)
endif()
unset(SRC)
unset(INC)
endif()
endif()

View File

@@ -26,272 +26,286 @@
#include "cuew.h"
#ifdef _MSC_VER
# include <Windows.h>
# include <Windows.h>
#endif
using std::string;
using std::vector;
namespace std {
template<typename T>
std::string to_string(const T &n) {
std::ostringstream s;
s << n;
return s.str();
}
}
class CompilationSettings
template<typename T> std::string to_string(const T &n)
{
public:
CompilationSettings()
: target_arch(0),
bits(64),
verbose(false),
fast_math(false)
{}
std::ostringstream s;
s << n;
return s.str();
}
} // namespace std
string cuda_toolkit_dir;
string input_file;
string output_file;
string ptx_file;
vector<string> defines;
vector<string> includes;
int target_arch;
int bits;
bool verbose;
bool fast_math;
class CompilationSettings {
public:
CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
{
}
string cuda_toolkit_dir;
string input_file;
string output_file;
string ptx_file;
vector<string> defines;
vector<string> includes;
int target_arch;
int bits;
bool verbose;
bool fast_math;
};
static bool compile_cuda(CompilationSettings &settings)
{
const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"};
const char* header_content[] = {"\n", "\n", "\n", "\n"};
const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
const char *header_content[] = {"\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str());
printf("Building %s\n", settings.input_file.c_str());
string code;
if(!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
return false;
}
string code;
if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
return false;
}
vector<string> options;
for(size_t i = 0; i < settings.includes.size(); i++) {
options.push_back("-I" + settings.includes[i]);
}
vector<string> options;
for (size_t i = 0; i < settings.includes.size(); i++) {
options.push_back("-I" + settings.includes[i]);
}
for(size_t i = 0; i < settings.defines.size(); i++) {
options.push_back("-D" + settings.defines[i]);
}
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
if(settings.fast_math)
options.push_back("--use_fast_math");
for (size_t i = 0; i < settings.defines.size(); i++) {
options.push_back("-D" + settings.defines[i]);
}
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
if (settings.fast_math)
options.push_back("--use_fast_math");
nvrtcProgram prog;
nvrtcResult result = nvrtcCreateProgram(&prog,
code.c_str(), // buffer
NULL, // name
sizeof(headers) / sizeof(void*), // numHeaders
header_content, // headers
headers); // includeNames
nvrtcProgram prog;
nvrtcResult result = nvrtcCreateProgram(&prog,
code.c_str(), // buffer
NULL, // name
sizeof(headers) / sizeof(void *), // numHeaders
header_content, // headers
headers); // includeNames
if(result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
return false;
}
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
return false;
}
/* Tranfer options to a classic C array. */
vector<const char*> opts(options.size());
for(size_t i = 0; i < options.size(); i++) {
opts[i] = options[i].c_str();
}
/* Tranfer options to a classic C array. */
vector<const char *> opts(options.size());
for (size_t i = 0; i < options.size(); i++) {
opts[i] = options[i].c_str();
}
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
if(result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
vector<char> log(log_size);
nvrtcGetProgramLog(prog, &log[0]);
fprintf(stderr, "%s\n", &log[0]);
vector<char> log(log_size);
nvrtcGetProgramLog(prog, &log[0]);
fprintf(stderr, "%s\n", &log[0]);
return false;
}
return false;
}
/* Retrieve the ptx code. */
size_t ptx_size;
result = nvrtcGetPTXSize(prog, &ptx_size);
if(result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
return false;
}
/* Retrieve the ptx code. */
size_t ptx_size;
result = nvrtcGetPTXSize(prog, &ptx_size);
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
return false;
}
vector<char> ptx_code(ptx_size);
result = nvrtcGetPTX(prog, &ptx_code[0]);
if(result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
vector<char> ptx_code(ptx_size);
result = nvrtcGetPTX(prog, &ptx_code[0]);
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
/* Write a file in the temp folder with the ptx code. */
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path();
FILE * f= fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
/* Write a file in the temp folder with the ptx code. */
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
OIIO::Filesystem::unique_path();
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
return true;
return true;
}
static bool link_ptxas(CompilationSettings &settings)
{
string cudapath = "";
if(settings.cuda_toolkit_dir.size())
cudapath = settings.cuda_toolkit_dir + "/bin/";
string cudapath = "";
if (settings.cuda_toolkit_dir.size())
cudapath = settings.cuda_toolkit_dir + "/bin/";
string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file +
" -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) +
" -m" + std::to_string(settings.bits);
string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
std::to_string(settings.bits);
if(settings.verbose) {
ptx += " --verbose";
printf("%s\n", ptx.c_str());
}
if (settings.verbose) {
ptx += " --verbose";
printf("%s\n", ptx.c_str());
}
int pxresult = system(ptx.c_str());
if(pxresult) {
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
return false;
}
int pxresult = system(ptx.c_str());
if (pxresult) {
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
return false;
}
if(!OIIO::Filesystem::remove(settings.ptx_file)) {
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
}
if (!OIIO::Filesystem::remove(settings.ptx_file)) {
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
}
return true;
return true;
}
static bool init(CompilationSettings &settings)
{
#ifdef _MSC_VER
if(settings.cuda_toolkit_dir.size()) {
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
}
if (settings.cuda_toolkit_dir.size()) {
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
}
#else
(void)settings;
(void)settings;
#endif
int cuewresult = cuewInit(CUEW_INIT_NVRTC);
if(cuewresult != CUEW_SUCCESS) {
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
return false;
}
int cuewresult = cuewInit(CUEW_INIT_NVRTC);
if (cuewresult != CUEW_SUCCESS) {
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
return false;
}
if(cuewNvrtcVersion() < 80) {
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
return false;
}
if (cuewNvrtcVersion() < 80) {
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
return false;
}
if(!nvrtcCreateProgram) {
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
return false;
}
if (!nvrtcCreateProgram) {
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
return false;
}
if(!nvrtcCompileProgram) {
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
return false;
}
if (!nvrtcCompileProgram) {
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
return false;
}
if(!nvrtcGetProgramLogSize) {
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
return false;
}
if (!nvrtcGetProgramLogSize) {
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
return false;
}
if(!nvrtcGetProgramLog) {
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
return false;
}
if (!nvrtcGetProgramLog) {
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
return false;
}
if(!nvrtcGetPTXSize) {
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
return false;
}
if (!nvrtcGetPTXSize) {
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
return false;
}
if(!nvrtcGetPTX) {
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
return false;
}
if (!nvrtcGetPTX) {
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
return false;
}
return true;
return true;
}
static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
{
OIIO::ArgParse ap;
ap.options("Usage: cycles_cubin_cc [options]",
"-target %d", &settings.target_arch, "target shader model",
"-m %d", &settings.bits, "Cuda architecture bits",
"-i %s", &settings.input_file, "Input source filename",
"-o %s", &settings.output_file, "Output cubin filename",
"-I %L", &settings.includes, "Add additional includepath",
"-D %L", &settings.defines, "Add additional defines",
"-v", &settings.verbose, "Use verbose logging",
"--use_fast_math", &settings.fast_math, "Use fast math",
"-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory",
NULL);
OIIO::ArgParse ap;
ap.options("Usage: cycles_cubin_cc [options]",
"-target %d",
&settings.target_arch,
"target shader model",
"-m %d",
&settings.bits,
"Cuda architecture bits",
"-i %s",
&settings.input_file,
"Input source filename",
"-o %s",
&settings.output_file,
"Output cubin filename",
"-I %L",
&settings.includes,
"Add additional includepath",
"-D %L",
&settings.defines,
"Add additional defines",
"-v",
&settings.verbose,
"Use verbose logging",
"--use_fast_math",
&settings.fast_math,
"Use fast math",
"-cuda-toolkit-dir %s",
&settings.cuda_toolkit_dir,
"path to the cuda toolkit binary directory",
NULL);
if(ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
return false;
}
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
return false;
}
if(!settings.output_file.size()) {
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
return false;
}
if (!settings.output_file.size()) {
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
return false;
}
if(!settings.input_file.size()) {
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
return false;
}
if (!settings.input_file.size()) {
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
return false;
}
if(!settings.target_arch) {
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
return false;
}
if (!settings.target_arch) {
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
return false;
}
return true;
return true;
}
int main(int argc, const char **argv)
{
CompilationSettings settings;
CompilationSettings settings;
if(!parse_parameters(argc, argv, settings)) {
fprintf(stderr, "Error: invalid parameters, exiting\n");
exit(EXIT_FAILURE);
}
if (!parse_parameters(argc, argv, settings)) {
fprintf(stderr, "Error: invalid parameters, exiting\n");
exit(EXIT_FAILURE);
}
if(!init(settings)) {
fprintf(stderr, "Error: initialization error, exiting\n");
exit(EXIT_FAILURE);
}
if (!init(settings)) {
fprintf(stderr, "Error: initialization error, exiting\n");
exit(EXIT_FAILURE);
}
if(!compile_cuda(settings)) {
fprintf(stderr, "Error: compilation error, exiting\n");
exit(EXIT_FAILURE);
}
if (!compile_cuda(settings)) {
fprintf(stderr, "Error: compilation error, exiting\n");
exit(EXIT_FAILURE);
}
if(!link_ptxas(settings)) {
exit(EXIT_FAILURE);
}
if (!link_ptxas(settings)) {
exit(EXIT_FAILURE);
}
return 0;
return 0;
}

View File

@@ -30,85 +30,93 @@ using namespace ccl;
int main(int argc, const char **argv)
{
util_logging_init(argv[0]);
path_init();
util_logging_init(argv[0]);
path_init();
/* device types */
string devicelist = "";
string devicename = "cpu";
bool list = false, debug = false;
int threads = 0, verbosity = 1;
/* device types */
string devicelist = "";
string devicename = "cpu";
bool list = false, debug = false;
int threads = 0, verbosity = 1;
vector<DeviceType>& types = Device::available_types();
vector<DeviceType> &types = Device::available_types();
foreach(DeviceType type, types) {
if(devicelist != "")
devicelist += ", ";
foreach (DeviceType type, types) {
if (devicelist != "")
devicelist += ", ";
devicelist += Device::string_from_type(type);
}
devicelist += Device::string_from_type(type);
}
/* parse options */
ArgParse ap;
/* parse options */
ArgParse ap;
ap.options ("Usage: cycles_server [options]",
"--device %s", &devicename, ("Devices to use: " + devicelist).c_str(),
"--list-devices", &list, "List information about all available devices",
"--threads %d", &threads, "Number of threads to use for CPU device",
ap.options("Usage: cycles_server [options]",
"--device %s",
&devicename,
("Devices to use: " + devicelist).c_str(),
"--list-devices",
&list,
"List information about all available devices",
"--threads %d",
&threads,
"Number of threads to use for CPU device",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
"--verbose %d", &verbosity, "Set verbosity of the logger",
"--debug",
&debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif
NULL);
NULL);
if(ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
exit(EXIT_FAILURE);
}
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
exit(EXIT_FAILURE);
}
if(debug) {
util_logging_start();
util_logging_verbosity_set(verbosity);
}
if (debug) {
util_logging_start();
util_logging_verbosity_set(verbosity);
}
if(list) {
vector<DeviceInfo>& devices = Device::available_devices();
if (list) {
vector<DeviceInfo> &devices = Device::available_devices();
printf("Devices:\n");
printf("Devices:\n");
foreach(DeviceInfo& info, devices) {
printf(" %s%s\n",
info.description.c_str(),
(info.display_device)? " (display)": "");
}
foreach (DeviceInfo &info, devices) {
printf(" %s%s\n", info.description.c_str(), (info.display_device) ? " (display)" : "");
}
exit(EXIT_SUCCESS);
}
exit(EXIT_SUCCESS);
}
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo>& devices = Device::available_devices();
DeviceInfo device_info;
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo> &devices = Device::available_devices();
DeviceInfo device_info;
foreach(DeviceInfo& device, devices) {
if(device_type == device.type) {
device_info = device;
break;
}
}
foreach (DeviceInfo &device, devices) {
if (device_type == device.type) {
device_info = device;
break;
}
}
TaskScheduler::init(threads);
TaskScheduler::init(threads);
while(1) {
Stats stats;
Device *device = Device::create(device_info, stats, true);
printf("Cycles Server with device: %s\n", device->info.description.c_str());
device->server_run();
delete device;
}
while (1) {
Stats stats;
Device *device = Device::create(device_info, stats, true);
printf("Cycles Server with device: %s\n", device->info.description.c_str());
device->server_run();
delete device;
}
TaskScheduler::exit();
TaskScheduler::exit();
return 0;
return 0;
}

View File

@@ -36,7 +36,7 @@
#include "util/util_version.h"
#ifdef WITH_CYCLES_STANDALONE_GUI
#include "util/util_view.h"
# include "util/util_view.h"
#endif
#include "app/cycles_xml.h"
@@ -44,447 +44,494 @@
CCL_NAMESPACE_BEGIN
struct Options {
Session *session;
Scene *scene;
string filepath;
int width, height;
SceneParams scene_params;
SessionParams session_params;
bool quiet;
bool show_help, interactive, pause;
string output_path;
Session *session;
Scene *scene;
string filepath;
int width, height;
SceneParams scene_params;
SessionParams session_params;
bool quiet;
bool show_help, interactive, pause;
string output_path;
} options;
static void session_print(const string& str)
static void session_print(const string &str)
{
/* print with carriage return to overwrite previous */
printf("\r%s", str.c_str());
/* print with carriage return to overwrite previous */
printf("\r%s", str.c_str());
/* add spaces to overwrite longer previous print */
static int maxlen = 0;
int len = str.size();
maxlen = max(len, maxlen);
/* add spaces to overwrite longer previous print */
static int maxlen = 0;
int len = str.size();
maxlen = max(len, maxlen);
for(int i = len; i < maxlen; i++)
printf(" ");
for (int i = len; i < maxlen; i++)
printf(" ");
/* flush because we don't write an end of line */
fflush(stdout);
/* flush because we don't write an end of line */
fflush(stdout);
}
static void session_print_status()
{
string status, substatus;
string status, substatus;
/* get status */
float progress = options.session->progress.get_progress();
options.session->progress.get_status(status, substatus);
/* get status */
float progress = options.session->progress.get_progress();
options.session->progress.get_status(status, substatus);
if(substatus != "")
status += ": " + substatus;
if (substatus != "")
status += ": " + substatus;
/* print status */
status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str());
session_print(status);
/* print status */
status = string_printf("Progress %05.2f %s", (double)progress * 100, status.c_str());
session_print(status);
}
static bool write_render(const uchar *pixels, int w, int h, int channels)
{
string msg = string_printf("Writing image %s", options.output_path.c_str());
session_print(msg);
string msg = string_printf("Writing image %s", options.output_path.c_str());
session_print(msg);
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
if(!out) {
return false;
}
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
if (!out) {
return false;
}
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
if(!out->open(options.output_path, spec)) {
return false;
}
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
if (!out->open(options.output_path, spec)) {
return false;
}
/* conversion for different top/bottom convention */
out->write_image(TypeDesc::UINT8,
pixels + (h - 1) * w * channels,
AutoStride,
-w * channels,
AutoStride);
/* conversion for different top/bottom convention */
out->write_image(
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
out->close();
out->close();
return true;
return true;
}
static BufferParams& session_buffer_params()
static BufferParams &session_buffer_params()
{
static BufferParams buffer_params;
buffer_params.width = options.width;
buffer_params.height = options.height;
buffer_params.full_width = options.width;
buffer_params.full_height = options.height;
static BufferParams buffer_params;
buffer_params.width = options.width;
buffer_params.height = options.height;
buffer_params.full_width = options.width;
buffer_params.full_height = options.height;
return buffer_params;
return buffer_params;
}
static void scene_init()
{
options.scene = new Scene(options.scene_params, options.session->device);
options.scene = new Scene(options.scene_params, options.session->device);
/* Read XML */
xml_read_file(options.scene, options.filepath.c_str());
/* Read XML */
xml_read_file(options.scene, options.filepath.c_str());
/* Camera width/height override? */
if(!(options.width == 0 || options.height == 0)) {
options.scene->camera->width = options.width;
options.scene->camera->height = options.height;
}
else {
options.width = options.scene->camera->width;
options.height = options.scene->camera->height;
}
/* Camera width/height override? */
if (!(options.width == 0 || options.height == 0)) {
options.scene->camera->width = options.width;
options.scene->camera->height = options.height;
}
else {
options.width = options.scene->camera->width;
options.height = options.scene->camera->height;
}
/* Calculate Viewplane */
options.scene->camera->compute_auto_viewplane();
/* Calculate Viewplane */
options.scene->camera->compute_auto_viewplane();
}
static void session_init()
{
options.session_params.write_render_cb = write_render;
options.session = new Session(options.session_params);
options.session_params.write_render_cb = write_render;
options.session = new Session(options.session_params);
if(options.session_params.background && !options.quiet)
options.session->progress.set_update_callback(function_bind(&session_print_status));
if (options.session_params.background && !options.quiet)
options.session->progress.set_update_callback(function_bind(&session_print_status));
#ifdef WITH_CYCLES_STANDALONE_GUI
else
options.session->progress.set_update_callback(function_bind(&view_redraw));
else
options.session->progress.set_update_callback(function_bind(&view_redraw));
#endif
/* load scene */
scene_init();
options.session->scene = options.scene;
/* load scene */
scene_init();
options.session->scene = options.scene;
options.session->reset(session_buffer_params(), options.session_params.samples);
options.session->start();
options.session->reset(session_buffer_params(), options.session_params.samples);
options.session->start();
}
static void session_exit()
{
if(options.session) {
delete options.session;
options.session = NULL;
}
if (options.session) {
delete options.session;
options.session = NULL;
}
if(options.session_params.background && !options.quiet) {
session_print("Finished Rendering.");
printf("\n");
}
if (options.session_params.background && !options.quiet) {
session_print("Finished Rendering.");
printf("\n");
}
}
#ifdef WITH_CYCLES_STANDALONE_GUI
static void display_info(Progress& progress)
static void display_info(Progress &progress)
{
static double latency = 0.0;
static double last = 0;
double elapsed = time_dt();
string str, interactive;
static double latency = 0.0;
static double last = 0;
double elapsed = time_dt();
string str, interactive;
latency = (elapsed - last);
last = elapsed;
latency = (elapsed - last);
last = elapsed;
double total_time, sample_time;
string status, substatus;
double total_time, sample_time;
string status, substatus;
progress.get_time(total_time, sample_time);
progress.get_status(status, substatus);
float progress_val = progress.get_progress();
progress.get_time(total_time, sample_time);
progress.get_status(status, substatus);
float progress_val = progress.get_progress();
if(substatus != "")
status += ": " + substatus;
if (substatus != "")
status += ": " + substatus;
interactive = options.interactive? "On":"Off";
interactive = options.interactive ? "On" : "Off";
str = string_printf(
"%s"
" Time: %.2f"
" Latency: %.4f"
" Progress: %05.2f"
" Average: %.4f"
" Interactive: %s",
status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
str = string_printf(
"%s"
" Time: %.2f"
" Latency: %.4f"
" Progress: %05.2f"
" Average: %.4f"
" Interactive: %s",
status.c_str(),
total_time,
latency,
(double)progress_val * 100,
sample_time,
interactive.c_str());
view_display_info(str.c_str());
view_display_info(str.c_str());
if(options.show_help)
view_display_help();
if (options.show_help)
view_display_help();
}
static void display()
{
static DeviceDrawParams draw_params = DeviceDrawParams();
static DeviceDrawParams draw_params = DeviceDrawParams();
options.session->draw(session_buffer_params(), draw_params);
options.session->draw(session_buffer_params(), draw_params);
display_info(options.session->progress);
display_info(options.session->progress);
}
static void motion(int x, int y, int button)
{
if(options.interactive) {
Transform matrix = options.session->scene->camera->matrix;
if (options.interactive) {
Transform matrix = options.session->scene->camera->matrix;
/* Translate */
if(button == 0) {
float3 translate = make_float3(x * 0.01f, -(y * 0.01f), 0.0f);
matrix = matrix * transform_translate(translate);
}
/* Translate */
if (button == 0) {
float3 translate = make_float3(x * 0.01f, -(y * 0.01f), 0.0f);
matrix = matrix * transform_translate(translate);
}
/* Rotate */
else if(button == 2) {
float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w));
/* Rotate */
else if (button == 2) {
float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w));
float4 r2 = make_float4(y * 0.1f, 1.0f, 0.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r2.x), make_float3(r2.y, r2.z, r2.w));
}
float4 r2 = make_float4(y * 0.1f, 1.0f, 0.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r2.x), make_float3(r2.y, r2.z, r2.w));
}
/* Update and Reset */
options.session->scene->camera->matrix = matrix;
options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true;
/* Update and Reset */
options.session->scene->camera->matrix = matrix;
options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples);
}
options.session->reset(session_buffer_params(), options.session_params.samples);
}
}
static void resize(int width, int height)
{
options.width = width;
options.height = height;
options.width = width;
options.height = height;
if(options.session) {
/* Update camera */
options.session->scene->camera->width = width;
options.session->scene->camera->height = height;
options.session->scene->camera->compute_auto_viewplane();
options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true;
if (options.session) {
/* Update camera */
options.session->scene->camera->width = width;
options.session->scene->camera->height = height;
options.session->scene->camera->compute_auto_viewplane();
options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples);
}
options.session->reset(session_buffer_params(), options.session_params.samples);
}
}
static void keyboard(unsigned char key)
{
/* Toggle help */
if(key == 'h')
options.show_help = !(options.show_help);
/* Toggle help */
if (key == 'h')
options.show_help = !(options.show_help);
/* Reset */
else if(key == 'r')
options.session->reset(session_buffer_params(), options.session_params.samples);
/* Reset */
else if (key == 'r')
options.session->reset(session_buffer_params(), options.session_params.samples);
/* Cancel */
else if(key == 27) // escape
options.session->progress.set_cancel("Canceled");
/* Cancel */
else if (key == 27) // escape
options.session->progress.set_cancel("Canceled");
/* Pause */
else if(key == 'p') {
options.pause = !options.pause;
options.session->set_pause(options.pause);
}
/* Pause */
else if (key == 'p') {
options.pause = !options.pause;
options.session->set_pause(options.pause);
}
/* Interactive Mode */
else if(key == 'i')
options.interactive = !(options.interactive);
/* Interactive Mode */
else if (key == 'i')
options.interactive = !(options.interactive);
/* Navigation */
else if(options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) {
Transform matrix = options.session->scene->camera->matrix;
float3 translate;
/* Navigation */
else if (options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) {
Transform matrix = options.session->scene->camera->matrix;
float3 translate;
if(key == 'w')
translate = make_float3(0.0f, 0.0f, 0.1f);
else if(key == 's')
translate = make_float3(0.0f, 0.0f, -0.1f);
else if(key == 'a')
translate = make_float3(-0.1f, 0.0f, 0.0f);
else if(key == 'd')
translate = make_float3(0.1f, 0.0f, 0.0f);
if (key == 'w')
translate = make_float3(0.0f, 0.0f, 0.1f);
else if (key == 's')
translate = make_float3(0.0f, 0.0f, -0.1f);
else if (key == 'a')
translate = make_float3(-0.1f, 0.0f, 0.0f);
else if (key == 'd')
translate = make_float3(0.1f, 0.0f, 0.0f);
matrix = matrix * transform_translate(translate);
matrix = matrix * transform_translate(translate);
/* Update and Reset */
options.session->scene->camera->matrix = matrix;
options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true;
/* Update and Reset */
options.session->scene->camera->matrix = matrix;
options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples);
}
options.session->reset(session_buffer_params(), options.session_params.samples);
}
/* Set Max Bounces */
else if(options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) {
int bounce;
switch(key) {
case '0': bounce = 0; break;
case '1': bounce = 1; break;
case '2': bounce = 2; break;
case '3': bounce = 3; break;
default: bounce = 0; break;
}
/* Set Max Bounces */
else if (options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) {
int bounce;
switch (key) {
case '0':
bounce = 0;
break;
case '1':
bounce = 1;
break;
case '2':
bounce = 2;
break;
case '3':
bounce = 3;
break;
default:
bounce = 0;
break;
}
options.session->scene->integrator->max_bounce = bounce;
options.session->scene->integrator->max_bounce = bounce;
/* Update and Reset */
options.session->scene->integrator->need_update = true;
/* Update and Reset */
options.session->scene->integrator->need_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples);
}
options.session->reset(session_buffer_params(), options.session_params.samples);
}
}
#endif
static int files_parse(int argc, const char *argv[])
{
if(argc > 0)
options.filepath = argv[0];
if (argc > 0)
options.filepath = argv[0];
return 0;
return 0;
}
static void options_parse(int argc, const char **argv)
{
options.width = 0;
options.height = 0;
options.filepath = "";
options.session = NULL;
options.quiet = false;
options.width = 0;
options.height = 0;
options.filepath = "";
options.session = NULL;
options.quiet = false;
/* device names */
string device_names = "";
string devicename = "CPU";
bool list = false;
/* device names */
string device_names = "";
string devicename = "CPU";
bool list = false;
/* List devices for which support is compiled in. */
vector<DeviceType> types = Device::available_types();
foreach(DeviceType type, types) {
if(device_names != "")
device_names += ", ";
/* List devices for which support is compiled in. */
vector<DeviceType> types = Device::available_types();
foreach (DeviceType type, types) {
if (device_names != "")
device_names += ", ";
device_names += Device::string_from_type(type);
}
device_names += Device::string_from_type(type);
}
/* shading system */
string ssname = "svm";
/* shading system */
string ssname = "svm";
/* parse options */
ArgParse ap;
bool help = false, debug = false, version = false;
int verbosity = 1;
/* parse options */
ArgParse ap;
bool help = false, debug = false, version = false;
int verbosity = 1;
ap.options ("Usage: cycles [options] file.xml",
"%*", files_parse, "",
"--device %s", &devicename, ("Devices to use: " + device_names).c_str(),
ap.options("Usage: cycles [options] file.xml",
"%*",
files_parse,
"",
"--device %s",
&devicename,
("Devices to use: " + device_names).c_str(),
#ifdef WITH_OSL
"--shadingsys %s", &ssname, "Shading system to use: svm, osl",
"--shadingsys %s",
&ssname,
"Shading system to use: svm, osl",
#endif
"--background", &options.session_params.background, "Render in background, without user interface",
"--quiet", &options.quiet, "In background mode, don't print progress messages",
"--samples %d", &options.session_params.samples, "Number of samples to render",
"--output %s", &options.output_path, "File path to write output image",
"--threads %d", &options.session_params.threads, "CPU Rendering Threads",
"--width %d", &options.width, "Window width in pixel",
"--height %d", &options.height, "Window height in pixel",
"--tile-width %d", &options.session_params.tile_size.x, "Tile width in pixels",
"--tile-height %d", &options.session_params.tile_size.y, "Tile height in pixels",
"--list-devices", &list, "List information about all available devices",
"--background",
&options.session_params.background,
"Render in background, without user interface",
"--quiet",
&options.quiet,
"In background mode, don't print progress messages",
"--samples %d",
&options.session_params.samples,
"Number of samples to render",
"--output %s",
&options.output_path,
"File path to write output image",
"--threads %d",
&options.session_params.threads,
"CPU Rendering Threads",
"--width %d",
&options.width,
"Window width in pixel",
"--height %d",
&options.height,
"Window height in pixel",
"--tile-width %d",
&options.session_params.tile_size.x,
"Tile width in pixels",
"--tile-height %d",
&options.session_params.tile_size.y,
"Tile height in pixels",
"--list-devices",
&list,
"List information about all available devices",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
"--verbose %d", &verbosity, "Set verbosity of the logger",
"--debug",
&debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif
"--help", &help, "Print help message",
"--version", &version, "Print version number",
NULL);
"--help",
&help,
"Print help message",
"--version",
&version,
"Print version number",
NULL);
if(ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
exit(EXIT_FAILURE);
}
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
exit(EXIT_FAILURE);
}
if(debug) {
util_logging_start();
util_logging_verbosity_set(verbosity);
}
if (debug) {
util_logging_start();
util_logging_verbosity_set(verbosity);
}
if(list) {
vector<DeviceInfo> devices = Device::available_devices();
printf("Devices:\n");
if (list) {
vector<DeviceInfo> devices = Device::available_devices();
printf("Devices:\n");
foreach(DeviceInfo& info, devices) {
printf(" %-10s%s%s\n",
Device::string_from_type(info.type).c_str(),
info.description.c_str(),
(info.display_device)? " (display)": "");
}
foreach (DeviceInfo &info, devices) {
printf(" %-10s%s%s\n",
Device::string_from_type(info.type).c_str(),
info.description.c_str(),
(info.display_device) ? " (display)" : "");
}
exit(EXIT_SUCCESS);
}
else if(version) {
printf("%s\n", CYCLES_VERSION_STRING);
exit(EXIT_SUCCESS);
}
else if(help || options.filepath == "") {
ap.usage();
exit(EXIT_SUCCESS);
}
exit(EXIT_SUCCESS);
}
else if (version) {
printf("%s\n", CYCLES_VERSION_STRING);
exit(EXIT_SUCCESS);
}
else if (help || options.filepath == "") {
ap.usage();
exit(EXIT_SUCCESS);
}
if(ssname == "osl")
options.scene_params.shadingsystem = SHADINGSYSTEM_OSL;
else if(ssname == "svm")
options.scene_params.shadingsystem = SHADINGSYSTEM_SVM;
if (ssname == "osl")
options.scene_params.shadingsystem = SHADINGSYSTEM_OSL;
else if (ssname == "svm")
options.scene_params.shadingsystem = SHADINGSYSTEM_SVM;
#ifndef WITH_CYCLES_STANDALONE_GUI
options.session_params.background = true;
options.session_params.background = true;
#endif
/* Use progressive rendering */
options.session_params.progressive = true;
/* Use progressive rendering */
options.session_params.progressive = true;
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK(device_type));
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK(device_type));
bool device_available = false;
if (!devices.empty()) {
options.session_params.device = devices.front();
device_available = true;
}
bool device_available = false;
if (!devices.empty()) {
options.session_params.device = devices.front();
device_available = true;
}
/* handle invalid configurations */
if(options.session_params.device.type == DEVICE_NONE || !device_available) {
fprintf(stderr, "Unknown device: %s\n", devicename.c_str());
exit(EXIT_FAILURE);
}
/* handle invalid configurations */
if (options.session_params.device.type == DEVICE_NONE || !device_available) {
fprintf(stderr, "Unknown device: %s\n", devicename.c_str());
exit(EXIT_FAILURE);
}
#ifdef WITH_OSL
else if(!(ssname == "osl" || ssname == "svm")) {
fprintf(stderr, "Unknown shading system: %s\n", ssname.c_str());
exit(EXIT_FAILURE);
}
else if(options.scene_params.shadingsystem == SHADINGSYSTEM_OSL && options.session_params.device.type != DEVICE_CPU) {
fprintf(stderr, "OSL shading system only works with CPU device\n");
exit(EXIT_FAILURE);
}
else if (!(ssname == "osl" || ssname == "svm")) {
fprintf(stderr, "Unknown shading system: %s\n", ssname.c_str());
exit(EXIT_FAILURE);
}
else if (options.scene_params.shadingsystem == SHADINGSYSTEM_OSL &&
options.session_params.device.type != DEVICE_CPU) {
fprintf(stderr, "OSL shading system only works with CPU device\n");
exit(EXIT_FAILURE);
}
#endif
else if(options.session_params.samples < 0) {
fprintf(stderr, "Invalid number of samples: %d\n", options.session_params.samples);
exit(EXIT_FAILURE);
}
else if(options.filepath == "") {
fprintf(stderr, "No file path specified\n");
exit(EXIT_FAILURE);
}
else if (options.session_params.samples < 0) {
fprintf(stderr, "Invalid number of samples: %d\n", options.session_params.samples);
exit(EXIT_FAILURE);
}
else if (options.filepath == "") {
fprintf(stderr, "No file path specified\n");
exit(EXIT_FAILURE);
}
/* For smoother Viewport */
options.session_params.start_resolution = 64;
/* For smoother Viewport */
options.session_params.start_resolution = 64;
}
CCL_NAMESPACE_END
@@ -493,26 +540,33 @@ using namespace ccl;
int main(int argc, const char **argv)
{
util_logging_init(argv[0]);
path_init();
options_parse(argc, argv);
util_logging_init(argv[0]);
path_init();
options_parse(argc, argv);
#ifdef WITH_CYCLES_STANDALONE_GUI
if(options.session_params.background) {
if (options.session_params.background) {
#endif
session_init();
options.session->wait();
session_exit();
session_init();
options.session->wait();
session_exit();
#ifdef WITH_CYCLES_STANDALONE_GUI
}
else {
string title = "Cycles: " + path_filename(options.filepath);
}
else {
string title = "Cycles: " + path_filename(options.filepath);
/* init/exit are callback so they run while GL is initialized */
view_main_loop(title.c_str(), options.width, options.height,
session_init, session_exit, resize, display, keyboard, motion);
}
/* init/exit are callback so they run while GL is initialized */
view_main_loop(title.c_str(),
options.width,
options.height,
session_init,
session_exit,
resize,
display,
keyboard,
motion);
}
#endif
return 0;
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -29,4 +29,4 @@ void xml_read_file(Scene *scene, const char *filepath);
CCL_NAMESPACE_END
#endif /* __CYCLES_XML_H__ */
#endif /* __CYCLES_XML_H__ */

View File

@@ -33,4 +33,4 @@ void CCL_logging_verbosity_set(int verbosity);
}
#endif
#endif /* __CCL_API_H__ */
#endif /* __CCL_API_H__ */

View File

@@ -1,68 +1,68 @@
set(INC
..
../../glew-mx
../../guardedalloc
../../mikktspace
../../../source/blender/makesdna
../../../source/blender/makesrna
../../../source/blender/blenlib
${CMAKE_BINARY_DIR}/source/blender/makesrna/intern
..
../../glew-mx
../../guardedalloc
../../mikktspace
../../../source/blender/makesdna
../../../source/blender/makesrna
../../../source/blender/blenlib
${CMAKE_BINARY_DIR}/source/blender/makesrna/intern
)
set(INC_SYS
${PYTHON_INCLUDE_DIRS}
${GLEW_INCLUDE_DIR}
${PYTHON_INCLUDE_DIRS}
${GLEW_INCLUDE_DIR}
)
set(SRC
blender_camera.cpp
blender_device.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
blender_particles.cpp
blender_curves.cpp
blender_logging.cpp
blender_python.cpp
blender_session.cpp
blender_shader.cpp
blender_sync.cpp
blender_texture.cpp
blender_camera.cpp
blender_device.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
blender_particles.cpp
blender_curves.cpp
blender_logging.cpp
blender_python.cpp
blender_session.cpp
blender_shader.cpp
blender_sync.cpp
blender_texture.cpp
CCL_api.h
blender_object_cull.h
blender_sync.h
blender_session.h
blender_texture.h
blender_util.h
CCL_api.h
blender_object_cull.h
blender_sync.h
blender_session.h
blender_texture.h
blender_util.h
)
set(LIB
cycles_bvh
cycles_device
cycles_graph
cycles_kernel
cycles_render
cycles_subd
cycles_util
cycles_bvh
cycles_device
cycles_graph
cycles_kernel
cycles_render
cycles_subd
cycles_util
)
if(WITH_CYCLES_LOGGING)
list(APPEND LIB
extern_glog
)
list(APPEND LIB
extern_glog
)
endif()
set(ADDON_FILES
addon/__init__.py
addon/engine.py
addon/operators.py
addon/osl.py
addon/presets.py
addon/properties.py
addon/ui.py
addon/version_update.py
addon/__init__.py
addon/engine.py
addon/operators.py
addon/osl.py
addon/presets.py
addon/properties.py
addon/ui.py
addon/version_update.py
)
add_definitions(${GL_DEFINITIONS})
@@ -72,14 +72,14 @@ if(WITH_CYCLES_DEVICE_OPENCL)
endif()
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# avoid link failure with clang 3.4 debug
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gline-tables-only")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gline-tables-only")
endif()
add_dependencies(bf_intern_cycles bf_rna)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -19,91 +19,89 @@
CCL_NAMESPACE_BEGIN
int blender_device_threads(BL::Scene& b_scene)
int blender_device_threads(BL::Scene &b_scene)
{
BL::RenderSettings b_r = b_scene.render();
BL::RenderSettings b_r = b_scene.render();
if(b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
return b_r.threads();
else
return 0;
if (b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
return b_r.threads();
else
return 0;
}
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background)
DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scene, bool background)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Default to CPU device. */
DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front();
/* Default to CPU device. */
DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front();
if(get_enum(cscene, "device") == 2) {
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if(!devices.empty()) {
device = devices.front();
}
}
else if(get_enum(cscene, "device") == 1) {
/* Find cycles preferences. */
PointerRNA cpreferences;
if (get_enum(cscene, "device") == 2) {
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if (!devices.empty()) {
device = devices.front();
}
}
else if (get_enum(cscene, "device") == 1) {
/* Find cycles preferences. */
PointerRNA cpreferences;
BL::Preferences::addons_iterator b_addon_iter;
for(b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end(); ++b_addon_iter) {
if(b_addon_iter->module() == "cycles") {
cpreferences = b_addon_iter->preferences().ptr;
break;
}
}
BL::Preferences::addons_iterator b_addon_iter;
for (b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end();
++b_addon_iter) {
if (b_addon_iter->module() == "cycles") {
cpreferences = b_addon_iter->preferences().ptr;
break;
}
}
/* Test if we are using GPU devices. */
enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPENCL = 2,
COMPUTE_DEVICE_NUM = 3,
};
/* Test if we are using GPU devices. */
enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPENCL = 2,
COMPUTE_DEVICE_NUM = 3,
};
ComputeDevice compute_device = (ComputeDevice)get_enum(cpreferences,
"compute_device_type",
COMPUTE_DEVICE_NUM,
COMPUTE_DEVICE_CPU);
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
if(compute_device != COMPUTE_DEVICE_CPU) {
/* Query GPU devices with matching types. */
uint mask = DEVICE_MASK_CPU;
if(compute_device == COMPUTE_DEVICE_CUDA) {
mask |= DEVICE_MASK_CUDA;
}
else if(compute_device == COMPUTE_DEVICE_OPENCL) {
mask |= DEVICE_MASK_OPENCL;
}
vector<DeviceInfo> devices = Device::available_devices(mask);
if (compute_device != COMPUTE_DEVICE_CPU) {
/* Query GPU devices with matching types. */
uint mask = DEVICE_MASK_CPU;
if (compute_device == COMPUTE_DEVICE_CUDA) {
mask |= DEVICE_MASK_CUDA;
}
else if (compute_device == COMPUTE_DEVICE_OPENCL) {
mask |= DEVICE_MASK_OPENCL;
}
vector<DeviceInfo> devices = Device::available_devices(mask);
/* Match device preferences and available devices. */
vector<DeviceInfo> used_devices;
RNA_BEGIN(&cpreferences, device, "devices") {
if(get_boolean(device, "use")) {
string id = get_string(device, "id");
foreach(DeviceInfo& info, devices) {
if(info.id == id) {
used_devices.push_back(info);
break;
}
}
}
} RNA_END;
/* Match device preferences and available devices. */
vector<DeviceInfo> used_devices;
RNA_BEGIN (&cpreferences, device, "devices") {
if (get_boolean(device, "use")) {
string id = get_string(device, "id");
foreach (DeviceInfo &info, devices) {
if (info.id == id) {
used_devices.push_back(info);
break;
}
}
}
}
RNA_END;
if(!used_devices.empty()) {
int threads = blender_device_threads(b_scene);
device = Device::get_multi_device(used_devices,
threads,
background);
}
/* Else keep using the CPU device that was set before. */
}
}
if (!used_devices.empty()) {
int threads = blender_device_threads(b_scene);
device = Device::get_multi_device(used_devices, threads, background);
}
/* Else keep using the CPU device that was set before. */
}
}
return device;
return device;
}
CCL_NAMESPACE_END

View File

@@ -27,11 +27,13 @@
CCL_NAMESPACE_BEGIN
/* Get number of threads to use for rendering. */
int blender_device_threads(BL::Scene& b_scene);
int blender_device_threads(BL::Scene &b_scene);
/* Convert Blender settings to device specification. */
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background);
DeviceInfo blender_device_info(BL::Preferences &b_preferences,
BL::Scene &b_scene,
bool background);
CCL_NAMESPACE_END
#endif /* __BLENDER_DEVICE_H__ */
#endif /* __BLENDER_DEVICE_H__ */

View File

@@ -19,15 +19,15 @@
void CCL_init_logging(const char *argv0)
{
ccl::util_logging_init(argv0);
ccl::util_logging_init(argv0);
}
void CCL_start_debug_logging()
{
ccl::util_logging_start();
ccl::util_logging_start();
}
void CCL_logging_verbosity_set(int verbosity)
{
ccl::util_logging_verbosity_set(verbosity);
ccl::util_logging_verbosity_set(verbosity);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -22,72 +22,69 @@
CCL_NAMESPACE_BEGIN
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
: use_scene_camera_cull_(false),
use_camera_cull_(false),
camera_cull_margin_(0.0f),
use_scene_distance_cull_(false),
use_distance_cull_(false),
distance_cull_margin_(0.0f)
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene &b_scene)
: use_scene_camera_cull_(false),
use_camera_cull_(false),
camera_cull_margin_(0.0f),
use_scene_distance_cull_(false),
use_distance_cull_(false),
distance_cull_margin_(0.0f)
{
if(b_scene.render().use_simplify()) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
if (b_scene.render().use_simplify()) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_camera_cull");
use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_distance_cull");
use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_camera_cull");
use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_distance_cull");
camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
if(distance_cull_margin_ == 0.0f) {
use_scene_distance_cull_ = false;
}
}
if (distance_cull_margin_ == 0.0f) {
use_scene_distance_cull_ = false;
}
}
}
void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
void BlenderObjectCulling::init_object(Scene *scene, BL::Object &b_ob)
{
if(!use_scene_camera_cull_ && !use_scene_distance_cull_) {
return;
}
if (!use_scene_camera_cull_ && !use_scene_distance_cull_) {
return;
}
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
if(use_camera_cull_ || use_distance_cull_) {
/* Need to have proper projection matrix. */
scene->camera->update(scene);
}
if (use_camera_cull_ || use_distance_cull_) {
/* Need to have proper projection matrix. */
scene->camera->update(scene);
}
}
bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
bool BlenderObjectCulling::test(Scene *scene, BL::Object &b_ob, Transform &tfm)
{
if(!use_camera_cull_ && !use_distance_cull_) {
return false;
}
if (!use_camera_cull_ && !use_distance_cull_) {
return false;
}
/* Compute world space bounding box corners. */
float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box();
for(int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0],
boundbox[3 * i + 1],
boundbox[3 * i + 2]);
bb[i] = transform_point(&tfm, p);
}
/* Compute world space bounding box corners. */
float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box();
for (int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0], boundbox[3 * i + 1], boundbox[3 * i + 2]);
bb[i] = transform_point(&tfm, p);
}
bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
return ((camera_culled && distance_culled) ||
(camera_culled && !use_distance_cull_) ||
(distance_culled && !use_camera_cull_));
return ((camera_culled && distance_culled) || (camera_culled && !use_distance_cull_) ||
(distance_culled && !use_camera_cull_));
}
/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
@@ -95,54 +92,50 @@ bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
*/
bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
{
Camera *cam = scene->camera;
const ProjectionTransform& worldtondc = cam->worldtondc;
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
bool all_behind = true;
for(int i = 0; i < 8; ++i) {
float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(dot(worldtondc.x, b),
dot(worldtondc.y, b),
dot(worldtondc.z, b),
dot(worldtondc.w, b));
p = float4_to_float3(c / c.w);
if(c.z < 0.0f) {
p.x = 1.0f - p.x;
p.y = 1.0f - p.y;
}
if(c.z >= -camera_cull_margin_) {
all_behind = false;
}
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
if(all_behind) {
return true;
}
return (bb_min.x >= 1.0f + camera_cull_margin_ ||
bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ ||
bb_max.y <= -camera_cull_margin_);
Camera *cam = scene->camera;
const ProjectionTransform &worldtondc = cam->worldtondc;
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
bool all_behind = true;
for (int i = 0; i < 8; ++i) {
float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(
dot(worldtondc.x, b), dot(worldtondc.y, b), dot(worldtondc.z, b), dot(worldtondc.w, b));
p = float4_to_float3(c / c.w);
if (c.z < 0.0f) {
p.x = 1.0f - p.x;
p.y = 1.0f - p.y;
}
if (c.z >= -camera_cull_margin_) {
all_behind = false;
}
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
if (all_behind) {
return true;
}
return (bb_min.x >= 1.0f + camera_cull_margin_ || bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ || bb_max.y <= -camera_cull_margin_);
}
bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
{
float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
/* Find min & max points for x & y & z on bounding box */
for(int i = 0; i < 8; ++i) {
float3 p = bb[i];
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
/* Find min & max points for x & y & z on bounding box */
for (int i = 0; i < 8; ++i) {
float3 p = bb[i];
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
float3 closest_point = max(min(bb_max,camera_position),bb_min);
return (len_squared(camera_position - closest_point) >
distance_cull_margin_ * distance_cull_margin_);
float3 closest_point = max(min(bb_max, camera_position), bb_min);
return (len_squared(camera_position - closest_point) >
distance_cull_margin_ * distance_cull_margin_);
}
CCL_NAMESPACE_END

View File

@@ -24,26 +24,25 @@ CCL_NAMESPACE_BEGIN
class Scene;
class BlenderObjectCulling
{
public:
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
class BlenderObjectCulling {
public:
BlenderObjectCulling(Scene *scene, BL::Scene &b_scene);
void init_object(Scene *scene, BL::Object& b_ob);
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm);
void init_object(Scene *scene, BL::Object &b_ob);
bool test(Scene *scene, BL::Object &b_ob, Transform &tfm);
private:
bool test_camera(Scene *scene, float3 bb[8]);
bool test_distance(Scene *scene, float3 bb[8]);
private:
bool test_camera(Scene *scene, float3 bb[8]);
bool test_distance(Scene *scene, float3 bb[8]);
bool use_scene_camera_cull_;
bool use_camera_cull_;
float camera_cull_margin_;
bool use_scene_distance_cull_;
bool use_distance_cull_;
float distance_cull_margin_;
bool use_scene_camera_cull_;
bool use_camera_cull_;
float camera_cull_margin_;
bool use_scene_distance_cull_;
bool use_distance_cull_;
float distance_cull_margin_;
};
CCL_NAMESPACE_END
#endif /* __BLENDER_OBJECT_CULL_H__ */
#endif /* __BLENDER_OBJECT_CULL_H__ */

View File

@@ -27,66 +27,66 @@ CCL_NAMESPACE_BEGIN
/* Utilities */
bool BlenderSync::sync_dupli_particle(BL::Object& b_ob,
BL::DepsgraphObjectInstance& b_instance,
bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance &b_instance,
Object *object)
{
/* test if this dupli was generated from a particle sytem */
BL::ParticleSystem b_psys = b_instance.particle_system();
if(!b_psys)
return false;
/* test if this dupli was generated from a particle sytem */
BL::ParticleSystem b_psys = b_instance.particle_system();
if (!b_psys)
return false;
object->hide_on_missing_motion = true;
object->hide_on_missing_motion = true;
/* test if we need particle data */
if(!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
return false;
/* test if we need particle data */
if (!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
return false;
/* don't handle child particles yet */
BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id();
/* don't handle child particles yet */
BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id();
if(persistent_id[0] >= b_psys.particles.length())
return false;
if (persistent_id[0] >= b_psys.particles.length())
return false;
/* find particle system */
ParticleSystemKey key(b_ob, persistent_id);
ParticleSystem *psys;
/* find particle system */
ParticleSystemKey key(b_ob, persistent_id);
ParticleSystem *psys;
bool first_use = !particle_system_map.is_used(key);
bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key);
bool first_use = !particle_system_map.is_used(key);
bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key);
/* no update needed? */
if(!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
return true;
/* no update needed? */
if (!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
return true;
/* first time used in this sync loop? clear and tag update */
if(first_use) {
psys->particles.clear();
psys->tag_update(scene);
}
/* first time used in this sync loop? clear and tag update */
if (first_use) {
psys->particles.clear();
psys->tag_update(scene);
}
/* add particle */
BL::Particle b_pa = b_psys.particles[persistent_id[0]];
Particle pa;
/* add particle */
BL::Particle b_pa = b_psys.particles[persistent_id[0]];
Particle pa;
pa.index = persistent_id[0];
pa.age = b_scene.frame_current() - b_pa.birth_time();
pa.lifetime = b_pa.lifetime();
pa.location = get_float3(b_pa.location());
pa.rotation = get_float4(b_pa.rotation());
pa.size = b_pa.size();
pa.velocity = get_float3(b_pa.velocity());
pa.angular_velocity = get_float3(b_pa.angular_velocity());
pa.index = persistent_id[0];
pa.age = b_scene.frame_current() - b_pa.birth_time();
pa.lifetime = b_pa.lifetime();
pa.location = get_float3(b_pa.location());
pa.rotation = get_float4(b_pa.rotation());
pa.size = b_pa.size();
pa.velocity = get_float3(b_pa.velocity());
pa.angular_velocity = get_float3(b_pa.angular_velocity());
psys->particles.push_back_slow(pa);
psys->particles.push_back_slow(pa);
if(object->particle_index != psys->particles.size() - 1)
scene->object_manager->tag_update(scene);
object->particle_system = psys;
object->particle_index = psys->particles.size() - 1;
if (object->particle_index != psys->particles.size() - 1)
scene->object_manager->tag_update(scene);
object->particle_system = psys;
object->particle_index = psys->particles.size() - 1;
/* return that this object has particle data */
return true;
/* return that this object has particle data */
return true;
}
CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -33,159 +33,153 @@ class RenderBuffers;
class RenderTile;
class BlenderSession {
public:
BlenderSession(BL::RenderEngine& b_engine,
BL::Preferences& b_userpref,
BL::BlendData& b_data,
bool preview_osl);
public:
BlenderSession(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::BlendData &b_data,
bool preview_osl);
BlenderSession(BL::RenderEngine& b_engine,
BL::Preferences& b_userpref,
BL::BlendData& b_data,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height);
BlenderSession(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::BlendData &b_data,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height);
~BlenderSession();
~BlenderSession();
void create();
void create();
/* session */
void create_session();
void free_session();
/* session */
void create_session();
void free_session();
void reset_session(BL::BlendData& b_data,
BL::Depsgraph& b_depsgraph);
void reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph);
/* offline render */
void render(BL::Depsgraph& b_depsgraph);
/* offline render */
void render(BL::Depsgraph &b_depsgraph);
void bake(BL::Depsgraph& b_depsgrah,
BL::Object& b_object,
const string& pass_type,
const int custom_flag,
const int object_id,
BL::BakePixel& pixel_array,
const size_t num_pixels,
const int depth,
float pixels[]);
void bake(BL::Depsgraph &b_depsgrah,
BL::Object &b_object,
const string &pass_type,
const int custom_flag,
const int object_id,
BL::BakePixel &pixel_array,
const size_t num_pixels,
const int depth,
float pixels[]);
void write_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile);
void write_render_tile(RenderTile& rtile);
void write_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
void update_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile);
void update_render_tile(RenderTile& rtile, bool highlight);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
void update_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
void update_render_tile(RenderTile &rtile, bool highlight);
/* interactive updates */
void synchronize(BL::Depsgraph& b_depsgraph);
/* interactive updates */
void synchronize(BL::Depsgraph &b_depsgraph);
/* drawing */
bool draw(int w, int h);
void tag_redraw();
void tag_update();
void get_status(string& status, string& substatus);
void get_kernel_status(string& kernel_status);
void get_progress(float& progress, double& total_time, double& render_time);
void test_cancel();
void update_status_progress();
void update_bake_progress();
/* drawing */
bool draw(int w, int h);
void tag_redraw();
void tag_update();
void get_status(string &status, string &substatus);
void get_kernel_status(string &kernel_status);
void get_progress(float &progress, double &total_time, double &render_time);
void test_cancel();
void update_status_progress();
void update_bake_progress();
bool background;
Session *session;
Scene *scene;
BlenderSync *sync;
double last_redraw_time;
bool background;
Session *session;
Scene *scene;
BlenderSync *sync;
double last_redraw_time;
BL::RenderEngine b_engine;
BL::Preferences b_userpref;
BL::BlendData b_data;
BL::RenderSettings b_render;
BL::Depsgraph b_depsgraph;
/* NOTE: Blender's scene might become invalid after call
* free_blender_memory_if_possible().
*/
BL::Scene b_scene;
BL::SpaceView3D b_v3d;
BL::RegionView3D b_rv3d;
string b_rlay_name;
string b_rview_name;
BL::RenderEngine b_engine;
BL::Preferences b_userpref;
BL::BlendData b_data;
BL::RenderSettings b_render;
BL::Depsgraph b_depsgraph;
/* NOTE: Blender's scene might become invalid after call
* free_blender_memory_if_possible().
*/
BL::Scene b_scene;
BL::SpaceView3D b_v3d;
BL::RegionView3D b_rv3d;
string b_rlay_name;
string b_rview_name;
string last_status;
string last_error;
float last_progress;
double last_status_time;
string last_status;
string last_error;
float last_progress;
double last_status_time;
int width, height;
bool preview_osl;
double start_resize_time;
int width, height;
bool preview_osl;
double start_resize_time;
void *python_thread_state;
void *python_thread_state;
/* Global state which is common for all render sessions created from Blender.
* Usually denotes command line arguments.
*/
/* Global state which is common for all render sessions created from Blender.
* Usually denotes command line arguments.
*/
/* Blender is running from the command line, no windows are shown and some
* extra render optimization is possible (possible to free draw-only data and
* so on.
*/
static bool headless;
/* Blender is running from the command line, no windows are shown and some
* extra render optimization is possible (possible to free draw-only data and
* so on.
*/
static bool headless;
/* ** Resumable render ** */
/* ** Resumable render ** */
/* Overall number of chunks in which the sample range is to be devided. */
static int num_resumable_chunks;
/* Overall number of chunks in which the sample range is to be devided. */
static int num_resumable_chunks;
/* Current resumable chunk index to render. */
static int current_resumable_chunk;
/* Current resumable chunk index to render. */
static int current_resumable_chunk;
/* Alternative to single-chunk rendering to render a range of chunks. */
static int start_resumable_chunk;
static int end_resumable_chunk;
/* Alternative to single-chunk rendering to render a range of chunks. */
static int start_resumable_chunk;
static int end_resumable_chunk;
static bool print_render_stats;
static bool print_render_stats;
protected:
void stamp_view_layer_metadata(Scene *scene, const string& view_layer_name);
protected:
void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
void do_write_update_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile,
bool do_update_only);
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight);
void do_write_update_render_result(BL::RenderResult &b_rr,
BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only);
void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
int builtin_image_frame(const string &builtin_name);
void builtin_image_info(const string &builtin_name,
void *builtin_data,
ImageMetaData& metadata);
bool builtin_image_pixels(const string &builtin_name,
void *builtin_data,
unsigned char *pixels,
const size_t pixels_size,
const bool free_cache);
bool builtin_image_float_pixels(const string &builtin_name,
void *builtin_data,
float *pixels,
const size_t pixels_size,
const bool free_cache);
void builtin_images_load();
int builtin_image_frame(const string &builtin_name);
void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata);
bool builtin_image_pixels(const string &builtin_name,
void *builtin_data,
unsigned char *pixels,
const size_t pixels_size,
const bool free_cache);
bool builtin_image_float_pixels(const string &builtin_name,
void *builtin_data,
float *pixels,
const size_t pixels_size,
const bool free_cache);
void builtin_images_load();
/* Update tile manager to reflect resumable render settings. */
void update_resumable_tile_manager(int num_samples);
/* Update tile manager to reflect resumable render settings. */
void update_resumable_tile_manager(int num_samples);
/* Is used after each render layer synchronization is done with the goal
* of freeing render engine data which is held from Blender side (for
* example, dependency graph).
*/
void free_blender_memory_if_possible();
/* Is used after each render layer synchronization is done with the goal
* of freeing render engine data which is held from Blender side (for
* example, dependency graph).
*/
void free_blender_memory_if_possible();
};
CCL_NAMESPACE_END
#endif /* __BLENDER_SESSION_H__ */
#endif /* __BLENDER_SESSION_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -49,171 +49,173 @@ class ShaderGraph;
class ShaderNode;
class BlenderSync {
public:
BlenderSync(BL::RenderEngine& b_engine,
BL::BlendData& b_data,
BL::Scene& b_scene,
Scene *scene,
bool preview,
Progress &progress);
~BlenderSync();
public:
BlenderSync(BL::RenderEngine &b_engine,
BL::BlendData &b_data,
BL::Scene &b_scene,
Scene *scene,
bool preview,
Progress &progress);
~BlenderSync();
/* sync */
void sync_recalc(BL::Depsgraph& b_depsgraph);
void sync_data(BL::RenderSettings& b_render,
BL::Depsgraph& b_depsgraph,
BL::SpaceView3D& b_v3d,
BL::Object& b_override,
int width, int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D& b_v3d, BL::ViewLayer& b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer& b_render_layer,
BL::ViewLayer& b_view_layer);
void sync_integrator();
void sync_camera(BL::RenderSettings& b_render,
BL::Object& b_override,
int width, int height,
const char *viewname);
void sync_view(BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height);
inline int get_layer_samples() { return view_layer.samples; }
inline int get_layer_bound_samples() { return view_layer.bound_samples; }
/* sync */
void sync_recalc(BL::Depsgraph &b_depsgraph);
void sync_data(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width,
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
int width,
int height,
const char *viewname);
void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
inline int get_layer_samples()
{
return view_layer.samples;
}
inline int get_layer_bound_samples()
{
return view_layer.bound_samples;
}
/* get parameters */
static SceneParams get_scene_params(BL::Scene& b_scene,
bool background);
static SessionParams get_session_params(BL::RenderEngine& b_engine,
BL::Preferences& b_userpref,
BL::Scene& b_scene,
bool background);
static bool get_session_pause(BL::Scene& b_scene, bool background);
static BufferParams get_buffer_params(BL::RenderSettings& b_render,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
Camera *cam,
int width, int height);
/* get parameters */
static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
static SessionParams get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::Scene &b_scene,
bool background);
static bool get_session_pause(BL::Scene &b_scene, bool background);
static BufferParams get_buffer_params(BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
int height);
static PassType get_pass_type(BL::RenderPass& b_pass);
static int get_denoising_pass(BL::RenderPass& b_pass);
static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass &b_pass);
private:
/* sync */
void sync_lights(BL::Depsgraph& b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph& b_depsgraph, bool update_all);
void sync_objects(BL::Depsgraph& b_depsgraph, float motion_time = 0.0f);
void sync_motion(BL::RenderSettings& b_render,
BL::Depsgraph& b_depsgraph,
BL::Object& b_override,
int width, int height,
void **python_thread_state);
void sync_film();
void sync_view();
void sync_world(BL::Depsgraph& b_depsgraph, bool update_all);
void sync_shaders(BL::Depsgraph& b_depsgraph);
void sync_curve_settings();
private:
/* sync */
void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_objects(BL::Depsgraph &b_depsgraph, float motion_time = 0.0f);
void sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::Object &b_override,
int width,
int height,
void **python_thread_state);
void sync_film();
void sync_view();
void sync_world(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_shaders(BL::Depsgraph &b_depsgraph);
void sync_curve_settings();
void sync_nodes(Shader *shader, BL::ShaderNodeTree& b_ntree);
Mesh *sync_mesh(BL::Depsgraph& b_depsgrpah,
BL::Object& b_ob,
BL::Object& b_ob_instance,
bool object_updated,
bool show_self,
bool show_particles);
void sync_curves(Mesh *mesh,
BL::Mesh& b_mesh,
BL::Object& b_ob,
bool motion,
int motion_step = 0);
Object *sync_object(BL::Depsgraph& b_depsgraph,
BL::ViewLayer& b_view_layer,
BL::DepsgraphObjectInstance& b_instance,
float motion_time,
bool show_self,
bool show_particles,
BlenderObjectCulling& culling,
bool *use_portal);
void sync_light(BL::Object& b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::Object& b_ob,
BL::Object& b_ob_instance,
int random_id,
Transform& tfm,
bool *use_portal);
void sync_background_light(bool use_portal);
void sync_mesh_motion(BL::Depsgraph& b_depsgraph,
BL::Object& b_ob,
Object *object,
float motion_time);
void sync_camera_motion(BL::RenderSettings& b_render,
BL::Object& b_ob,
int width, int height,
float motion_time);
void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
BL::Object &b_ob,
BL::Object &b_ob_instance,
bool object_updated,
bool show_self,
bool show_particles);
void sync_curves(
Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
Object *sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
bool show_self,
bool show_particles,
BlenderObjectCulling &culling,
bool *use_portal);
void sync_light(BL::Object &b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::Object &b_ob,
BL::Object &b_ob_instance,
int random_id,
Transform &tfm,
bool *use_portal);
void sync_background_light(bool use_portal);
void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
BL::Object &b_ob,
Object *object,
float motion_time);
void sync_camera_motion(
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
/* particles */
bool sync_dupli_particle(BL::Object& b_ob,
BL::DepsgraphObjectInstance& b_instance,
Object *object);
/* particles */
bool sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance &b_instance,
Object *object);
/* Images. */
void sync_images();
/* Images. */
void sync_images();
/* Early data free. */
void free_data_after_sync(BL::Depsgraph& b_depsgraph);
/* Early data free. */
void free_data_after_sync(BL::Depsgraph &b_depsgraph);
/* util */
void find_shader(BL::ID& id, vector<Shader*>& used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object& b_ob);
bool object_is_mesh(BL::Object& b_ob);
bool object_is_light(BL::Object& b_ob);
/* util */
void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object &b_ob);
bool object_is_mesh(BL::Object &b_ob);
bool object_is_light(BL::Object &b_ob);
/* variables */
BL::RenderEngine b_engine;
BL::BlendData b_data;
BL::Scene b_scene;
/* variables */
BL::RenderEngine b_engine;
BL::BlendData b_data;
BL::Scene b_scene;
id_map<void*, Shader> shader_map;
id_map<ObjectKey, Object> object_map;
id_map<void*, Mesh> mesh_map;
id_map<ObjectKey, Light> light_map;
id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
set<Mesh*> mesh_synced;
set<Mesh*> mesh_motion_synced;
set<float> motion_times;
void *world_map;
bool world_recalc;
id_map<void *, Shader> shader_map;
id_map<ObjectKey, Object> object_map;
id_map<void *, Mesh> mesh_map;
id_map<ObjectKey, Light> light_map;
id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
set<Mesh *> mesh_synced;
set<Mesh *> mesh_motion_synced;
set<float> motion_times;
void *world_map;
bool world_recalc;
Scene *scene;
bool preview;
bool experimental;
Scene *scene;
bool preview;
bool experimental;
float dicing_rate;
int max_subdivisions;
float dicing_rate;
int max_subdivisions;
struct RenderLayerInfo {
RenderLayerInfo()
: material_override(PointerRNA_NULL),
use_background_shader(true),
use_background_ao(true),
use_surfaces(true),
use_hair(true),
samples(0),
bound_samples(false)
{}
struct RenderLayerInfo {
RenderLayerInfo()
: material_override(PointerRNA_NULL),
use_background_shader(true),
use_background_ao(true),
use_surfaces(true),
use_hair(true),
samples(0),
bound_samples(false)
{
}
string name;
BL::Material material_override;
bool use_background_shader;
bool use_background_ao;
bool use_surfaces;
bool use_hair;
int samples;
bool bound_samples;
} view_layer;
string name;
BL::Material material_override;
bool use_background_shader;
bool use_background_ao;
bool use_surfaces;
bool use_hair;
int samples;
bool bound_samples;
} view_layer;
Progress &progress;
Progress &progress;
};
CCL_NAMESPACE_END
#endif /* __BLENDER_SYNC_H__ */
#endif /* __BLENDER_SYNC_H__ */

View File

@@ -22,36 +22,36 @@ namespace {
/* Point density helpers. */
void density_texture_space_invert(float3& loc,
float3& size)
void density_texture_space_invert(float3 &loc, float3 &size)
{
if(size.x != 0.0f) size.x = 0.5f/size.x;
if(size.y != 0.0f) size.y = 0.5f/size.y;
if(size.z != 0.0f) size.z = 0.5f/size.z;
if (size.x != 0.0f)
size.x = 0.5f / size.x;
if (size.y != 0.0f)
size.y = 0.5f / size.y;
if (size.z != 0.0f)
size.z = 0.5f / size.z;
loc = loc*size - make_float3(0.5f, 0.5f, 0.5f);
loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
}
} /* namespace */
} /* namespace */
void point_density_texture_space(BL::Depsgraph& b_depsgraph,
BL::ShaderNodeTexPointDensity& b_point_density_node,
float3& loc,
float3& size)
void point_density_texture_space(BL::Depsgraph &b_depsgraph,
BL::ShaderNodeTexPointDensity &b_point_density_node,
float3 &loc,
float3 &size)
{
BL::Object b_ob(b_point_density_node.object());
if(!b_ob) {
loc = make_float3(0.0f, 0.0f, 0.0f);
size = make_float3(0.0f, 0.0f, 0.0f);
return;
}
float3 min, max;
b_point_density_node.calc_point_density_minmax(b_depsgraph,
&min[0],
&max[0]);
loc = (min + max) * 0.5f;
size = (max - min) * 0.5f;
density_texture_space_invert(loc, size);
BL::Object b_ob(b_point_density_node.object());
if (!b_ob) {
loc = make_float3(0.0f, 0.0f, 0.0f);
size = make_float3(0.0f, 0.0f, 0.0f);
return;
}
float3 min, max;
b_point_density_node.calc_point_density_minmax(b_depsgraph, &min[0], &max[0]);
loc = (min + max) * 0.5f;
size = (max - min) * 0.5f;
density_texture_space_invert(loc, size);
}
CCL_NAMESPACE_END

View File

@@ -22,11 +22,11 @@
CCL_NAMESPACE_BEGIN
void point_density_texture_space(BL::Depsgraph& b_depsgraph,
BL::ShaderNodeTexPointDensity& b_point_density_node,
float3& loc,
float3& size);
void point_density_texture_space(BL::Depsgraph &b_depsgraph,
BL::ShaderNodeTexPointDensity &b_point_density_node,
float3 &loc,
float3 &size);
CCL_NAMESPACE_END
#endif /* __BLENDER_TEXTURE_H__ */
#endif /* __BLENDER_TEXTURE_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,42 +1,42 @@
set(INC
..
..
)
set(INC_SYS
)
set(SRC
bvh.cpp
bvh2.cpp
bvh4.cpp
bvh8.cpp
bvh_binning.cpp
bvh_build.cpp
bvh_embree.cpp
bvh_node.cpp
bvh_sort.cpp
bvh_split.cpp
bvh_unaligned.cpp
bvh.cpp
bvh2.cpp
bvh4.cpp
bvh8.cpp
bvh_binning.cpp
bvh_build.cpp
bvh_embree.cpp
bvh_node.cpp
bvh_sort.cpp
bvh_split.cpp
bvh_unaligned.cpp
)
set(SRC_HEADERS
bvh.h
bvh2.h
bvh4.h
bvh8.h
bvh_binning.h
bvh_build.h
bvh_embree.h
bvh_node.h
bvh_params.h
bvh_sort.h
bvh_split.h
bvh_unaligned.h
bvh.h
bvh2.h
bvh4.h
bvh8.h
bvh_binning.h
bvh_build.h
bvh_embree.h
bvh_node.h
bvh_params.h
bvh_sort.h
bvh_split.h
bvh_unaligned.h
)
set(LIB
cycles_render
cycles_render
)
include_directories(${INC})

View File

@@ -27,7 +27,7 @@
#include "bvh/bvh_node.h"
#ifdef WITH_EMBREE
#include "bvh/bvh_embree.h"
# include "bvh/bvh_embree.h"
#endif
#include "util/util_foreach.h"
@@ -40,533 +40,529 @@ CCL_NAMESPACE_BEGIN
const char *bvh_layout_name(BVHLayout layout)
{
switch(layout) {
case BVH_LAYOUT_BVH2: return "BVH2";
case BVH_LAYOUT_BVH4: return "BVH4";
case BVH_LAYOUT_BVH8: return "BVH8";
case BVH_LAYOUT_NONE: return "NONE";
case BVH_LAYOUT_EMBREE: return "EMBREE";
case BVH_LAYOUT_ALL: return "ALL";
}
LOG(DFATAL) << "Unsupported BVH layout was passed.";
return "";
switch (layout) {
case BVH_LAYOUT_BVH2:
return "BVH2";
case BVH_LAYOUT_BVH4:
return "BVH4";
case BVH_LAYOUT_BVH8:
return "BVH8";
case BVH_LAYOUT_NONE:
return "NONE";
case BVH_LAYOUT_EMBREE:
return "EMBREE";
case BVH_LAYOUT_ALL:
return "ALL";
}
LOG(DFATAL) << "Unsupported BVH layout was passed.";
return "";
}
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts)
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts)
{
const BVHLayoutMask requested_layout_mask = (BVHLayoutMask)requested_layout;
/* Check whether requested layout is supported, if so -- no need to do
* any extra computation.
*/
if(supported_layouts & requested_layout_mask) {
return requested_layout;
}
/* Some bit magic to get widest supported BVH layout. */
/* This is a mask of supported BVH layouts which are narrower than the
* requested one.
*/
const BVHLayoutMask allowed_layouts_mask =
(supported_layouts & (requested_layout_mask - 1));
/* We get widest from allowed ones and convert mask to actual layout. */
const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask);
return (BVHLayout)(1 << widest_allowed_layout_mask);
const BVHLayoutMask requested_layout_mask = (BVHLayoutMask)requested_layout;
/* Check whether requested layout is supported, if so -- no need to do
* any extra computation.
*/
if (supported_layouts & requested_layout_mask) {
return requested_layout;
}
/* Some bit magic to get widest supported BVH layout. */
/* This is a mask of supported BVH layouts which are narrower than the
* requested one.
*/
const BVHLayoutMask allowed_layouts_mask = (supported_layouts & (requested_layout_mask - 1));
/* We get widest from allowed ones and convert mask to actual layout. */
const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask);
return (BVHLayout)(1 << widest_allowed_layout_mask);
}
/* Pack Utility */
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i)
: node(n), idx(i)
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i) : node(n), idx(i)
{
}
int BVHStackEntry::encodeIdx() const
{
return (node->is_leaf())? ~idx: idx;
return (node->is_leaf()) ? ~idx : idx;
}
/* BVH */
BVH::BVH(const BVHParams& params_, const vector<Object*>& objects_)
: params(params_), objects(objects_)
BVH::BVH(const BVHParams &params_, const vector<Object *> &objects_)
: params(params_), objects(objects_)
{
}
BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
BVH *BVH::create(const BVHParams &params, const vector<Object *> &objects)
{
switch(params.bvh_layout) {
case BVH_LAYOUT_BVH2:
return new BVH2(params, objects);
case BVH_LAYOUT_BVH4:
return new BVH4(params, objects);
case BVH_LAYOUT_BVH8:
return new BVH8(params, objects);
case BVH_LAYOUT_EMBREE:
switch (params.bvh_layout) {
case BVH_LAYOUT_BVH2:
return new BVH2(params, objects);
case BVH_LAYOUT_BVH4:
return new BVH4(params, objects);
case BVH_LAYOUT_BVH8:
return new BVH8(params, objects);
case BVH_LAYOUT_EMBREE:
#ifdef WITH_EMBREE
return new BVHEmbree(params, objects);
return new BVHEmbree(params, objects);
#endif
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
break;
}
LOG(DFATAL) << "Requested unsupported BVH layout.";
return NULL;
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
break;
}
LOG(DFATAL) << "Requested unsupported BVH layout.";
return NULL;
}
/* Building */
void BVH::build(Progress& progress, Stats*)
void BVH::build(Progress &progress, Stats *)
{
progress.set_substatus("Building BVH");
progress.set_substatus("Building BVH");
/* build nodes */
BVHBuild bvh_build(objects,
pack.prim_type,
pack.prim_index,
pack.prim_object,
pack.prim_time,
params,
progress);
BVHNode *bvh2_root = bvh_build.run();
/* build nodes */
BVHBuild bvh_build(objects,
pack.prim_type,
pack.prim_index,
pack.prim_object,
pack.prim_time,
params,
progress);
BVHNode *bvh2_root = bvh_build.run();
if(progress.get_cancel()) {
if(bvh2_root != NULL) {
bvh2_root->deleteSubtree();
}
return;
}
if (progress.get_cancel()) {
if (bvh2_root != NULL) {
bvh2_root->deleteSubtree();
}
return;
}
/* BVH builder returns tree in a binary mode (with two children per inner
* node. Need to adopt that for a wider BVH implementations. */
BVHNode *root = widen_children_nodes(bvh2_root);
if(root != bvh2_root) {
bvh2_root->deleteSubtree();
}
/* BVH builder returns tree in a binary mode (with two children per inner
* node. Need to adopt that for a wider BVH implementations. */
BVHNode *root = widen_children_nodes(bvh2_root);
if (root != bvh2_root) {
bvh2_root->deleteSubtree();
}
if(progress.get_cancel()) {
if(root != NULL) {
root->deleteSubtree();
}
return;
}
if (progress.get_cancel()) {
if (root != NULL) {
root->deleteSubtree();
}
return;
}
/* pack triangles */
progress.set_substatus("Packing BVH triangles and strands");
pack_primitives();
/* pack triangles */
progress.set_substatus("Packing BVH triangles and strands");
pack_primitives();
if(progress.get_cancel()) {
root->deleteSubtree();
return;
}
if (progress.get_cancel()) {
root->deleteSubtree();
return;
}
/* pack nodes */
progress.set_substatus("Packing BVH nodes");
pack_nodes(root);
/* pack nodes */
progress.set_substatus("Packing BVH nodes");
pack_nodes(root);
/* free build nodes */
root->deleteSubtree();
/* free build nodes */
root->deleteSubtree();
}
/* Refitting */
void BVH::refit(Progress& progress)
void BVH::refit(Progress &progress)
{
progress.set_substatus("Packing BVH primitives");
pack_primitives();
progress.set_substatus("Packing BVH primitives");
pack_primitives();
if(progress.get_cancel()) return;
if (progress.get_cancel())
return;
progress.set_substatus("Refitting BVH nodes");
refit_nodes();
progress.set_substatus("Refitting BVH nodes");
refit_nodes();
}
void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility)
void BVH::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility)
{
/* Refit range of primitives. */
for(int prim = start; prim < end; prim++) {
int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim];
Object *ob = objects[tob];
/* Refit range of primitives. */
for (int prim = start; prim < end; prim++) {
int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim];
Object *ob = objects[tob];
if(pidx == -1) {
/* Object instance. */
bbox.grow(ob->bounds);
}
else {
/* Primitives. */
const Mesh *mesh = ob->mesh;
if (pidx == -1) {
/* Object instance. */
bbox.grow(ob->bounds);
}
else {
/* Primitives. */
const Mesh *mesh = ob->mesh;
if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
int str_offset = (params.top_level)? mesh->curve_offset: 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
visibility |= PATH_RAY_CURVE;
visibility |= PATH_RAY_CURVE;
/* Motion curves. */
if(mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
/* Motion curves. */
if (mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3();
if (attr) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++)
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
}
}
}
else {
/* Triangles. */
int tri_offset = (params.top_level)? mesh->tri_offset: 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0];
for (size_t i = 0; i < steps; i++)
curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
}
}
}
else {
/* Triangles. */
int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox);
triangle.bounds_grow(vpos, bbox);
/* Motion triangles. */
if(mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
/* Motion triangles. */
if (mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3();
if (attr) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++)
triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
}
}
}
}
visibility |= ob->visibility_for_tracing();
}
for (size_t i = 0; i < steps; i++)
triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
}
}
}
}
visibility |= ob->visibility_for_tracing();
}
}
/* Triangles */
void BVH::pack_triangle(int idx, float4 tri_verts[3])
{
int tob = pack.prim_object[idx];
assert(tob >= 0 && tob < objects.size());
const Mesh *mesh = objects[tob]->mesh;
int tob = pack.prim_object[idx];
assert(tob >= 0 && tob < objects.size());
const Mesh *mesh = objects[tob]->mesh;
int tidx = pack.prim_index[idx];
Mesh::Triangle t = mesh->get_triangle(tidx);
const float3 *vpos = &mesh->verts[0];
float3 v0 = vpos[t.v[0]];
float3 v1 = vpos[t.v[1]];
float3 v2 = vpos[t.v[2]];
int tidx = pack.prim_index[idx];
Mesh::Triangle t = mesh->get_triangle(tidx);
const float3 *vpos = &mesh->verts[0];
float3 v0 = vpos[t.v[0]];
float3 v1 = vpos[t.v[1]];
float3 v2 = vpos[t.v[2]];
tri_verts[0] = float3_to_float4(v0);
tri_verts[1] = float3_to_float4(v1);
tri_verts[2] = float3_to_float4(v2);
tri_verts[0] = float3_to_float4(v0);
tri_verts[1] = float3_to_float4(v1);
tri_verts[2] = float3_to_float4(v2);
}
void BVH::pack_primitives()
{
const size_t tidx_size = pack.prim_index.size();
size_t num_prim_triangles = 0;
/* Count number of triangles primitives in BVH. */
for(unsigned int i = 0; i < tidx_size; i++) {
if((pack.prim_index[i] != -1)) {
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
++num_prim_triangles;
}
}
}
/* Reserve size for arrays. */
pack.prim_tri_index.clear();
pack.prim_tri_index.resize(tidx_size);
pack.prim_tri_verts.clear();
pack.prim_tri_verts.resize(num_prim_triangles * 3);
pack.prim_visibility.clear();
pack.prim_visibility.resize(tidx_size);
/* Fill in all the arrays. */
size_t prim_triangle_index = 0;
for(unsigned int i = 0; i < tidx_size; i++) {
if(pack.prim_index[i] != -1) {
int tob = pack.prim_object[i];
Object *ob = objects[tob];
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
pack_triangle(i, (float4*)&pack.prim_tri_verts[3 * prim_triangle_index]);
pack.prim_tri_index[i] = 3 * prim_triangle_index;
++prim_triangle_index;
}
else {
pack.prim_tri_index[i] = -1;
}
pack.prim_visibility[i] = ob->visibility_for_tracing();
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack.prim_visibility[i] |= PATH_RAY_CURVE;
}
}
else {
pack.prim_tri_index[i] = -1;
pack.prim_visibility[i] = 0;
}
}
const size_t tidx_size = pack.prim_index.size();
size_t num_prim_triangles = 0;
/* Count number of triangles primitives in BVH. */
for (unsigned int i = 0; i < tidx_size; i++) {
if ((pack.prim_index[i] != -1)) {
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
++num_prim_triangles;
}
}
}
/* Reserve size for arrays. */
pack.prim_tri_index.clear();
pack.prim_tri_index.resize(tidx_size);
pack.prim_tri_verts.clear();
pack.prim_tri_verts.resize(num_prim_triangles * 3);
pack.prim_visibility.clear();
pack.prim_visibility.resize(tidx_size);
/* Fill in all the arrays. */
size_t prim_triangle_index = 0;
for (unsigned int i = 0; i < tidx_size; i++) {
if (pack.prim_index[i] != -1) {
int tob = pack.prim_object[i];
Object *ob = objects[tob];
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
pack_triangle(i, (float4 *)&pack.prim_tri_verts[3 * prim_triangle_index]);
pack.prim_tri_index[i] = 3 * prim_triangle_index;
++prim_triangle_index;
}
else {
pack.prim_tri_index[i] = -1;
}
pack.prim_visibility[i] = ob->visibility_for_tracing();
if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack.prim_visibility[i] |= PATH_RAY_CURVE;
}
}
else {
pack.prim_tri_index[i] = -1;
pack.prim_visibility[i] = 0;
}
}
}
/* Pack Instances */
void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
{
/* The BVH's for instances are built separately, but for traversal all
* BVH's are stored in global arrays. This function merges them into the
* top level BVH, adjusting indexes and offsets where appropriate.
*/
const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
/* The BVH's for instances are built separately, but for traversal all
* BVH's are stored in global arrays. This function merges them into the
* top level BVH, adjusting indexes and offsets where appropriate.
*/
const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
/* Adjust primitive index to point to the triangle in the global array, for
* meshes with transform applied and already in the top level BVH.
*/
for(size_t i = 0; i < pack.prim_index.size(); i++)
if(pack.prim_index[i] != -1) {
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
else
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
}
/* Adjust primitive index to point to the triangle in the global array, for
* meshes with transform applied and already in the top level BVH.
*/
for (size_t i = 0; i < pack.prim_index.size(); i++)
if (pack.prim_index[i] != -1) {
if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
else
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
}
/* track offsets of instanced BVH data in global array */
size_t prim_offset = pack.prim_index.size();
size_t nodes_offset = nodes_size;
size_t nodes_leaf_offset = leaf_nodes_size;
/* track offsets of instanced BVH data in global array */
size_t prim_offset = pack.prim_index.size();
size_t nodes_offset = nodes_size;
size_t nodes_leaf_offset = leaf_nodes_size;
/* clear array that gives the node indexes for instanced objects */
pack.object_node.clear();
/* clear array that gives the node indexes for instanced objects */
pack.object_node.clear();
/* reserve */
size_t prim_index_size = pack.prim_index.size();
size_t prim_tri_verts_size = pack.prim_tri_verts.size();
/* reserve */
size_t prim_index_size = pack.prim_index.size();
size_t prim_tri_verts_size = pack.prim_tri_verts.size();
size_t pack_prim_index_offset = prim_index_size;
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t pack_nodes_offset = nodes_size;
size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0;
size_t pack_prim_index_offset = prim_index_size;
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t pack_nodes_offset = nodes_size;
size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0;
map<Mesh*, int> mesh_map;
map<Mesh *, int> mesh_map;
foreach(Object *ob, objects) {
Mesh *mesh = ob->mesh;
BVH *bvh = mesh->bvh;
foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh;
BVH *bvh = mesh->bvh;
if(mesh->need_build_bvh()) {
if(mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
nodes_size += bvh->pack.nodes.size();
leaf_nodes_size += bvh->pack.leaf_nodes.size();
if (mesh->need_build_bvh()) {
if (mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
nodes_size += bvh->pack.nodes.size();
leaf_nodes_size += bvh->pack.leaf_nodes.size();
mesh_map[mesh] = 1;
}
}
}
mesh_map[mesh] = 1;
}
}
}
mesh_map.clear();
mesh_map.clear();
pack.prim_index.resize(prim_index_size);
pack.prim_type.resize(prim_index_size);
pack.prim_object.resize(prim_index_size);
pack.prim_visibility.resize(prim_index_size);
pack.prim_tri_verts.resize(prim_tri_verts_size);
pack.prim_tri_index.resize(prim_index_size);
pack.nodes.resize(nodes_size);
pack.leaf_nodes.resize(leaf_nodes_size);
pack.object_node.resize(objects.size());
pack.prim_index.resize(prim_index_size);
pack.prim_type.resize(prim_index_size);
pack.prim_object.resize(prim_index_size);
pack.prim_visibility.resize(prim_index_size);
pack.prim_tri_verts.resize(prim_tri_verts_size);
pack.prim_tri_index.resize(prim_index_size);
pack.nodes.resize(nodes_size);
pack.leaf_nodes.resize(leaf_nodes_size);
pack.object_node.resize(objects.size());
if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
pack.prim_time.resize(prim_index_size);
}
if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
pack.prim_time.resize(prim_index_size);
}
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size())? &pack.prim_visibility[0]: NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL;
int *pack_prim_index = (pack.prim_index.size()) ? &pack.prim_index[0] : NULL;
int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL;
int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size()) ? &pack.prim_visibility[0] : NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL;
int4 *pack_nodes = (pack.nodes.size()) ? &pack.nodes[0] : NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
/* merge */
foreach(Object *ob, objects) {
Mesh *mesh = ob->mesh;
/* merge */
foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
if(!mesh->need_build_bvh()) {
pack.object_node[object_offset++] = 0;
continue;
}
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
if (!mesh->need_build_bvh()) {
pack.object_node[object_offset++] = 0;
continue;
}
/* if mesh already added once, don't add it again, but used set
* node offset for this object */
map<Mesh*, int>::iterator it = mesh_map.find(mesh);
/* if mesh already added once, don't add it again, but used set
* node offset for this object */
map<Mesh *, int>::iterator it = mesh_map.find(mesh);
if(mesh_map.find(mesh) != mesh_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
if (mesh_map.find(mesh) != mesh_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
BVH *bvh = mesh->bvh;
BVH *bvh = mesh->bvh;
int noffset = nodes_offset;
int noffset_leaf = nodes_leaf_offset;
int mesh_tri_offset = mesh->tri_offset;
int mesh_curve_offset = mesh->curve_offset;
int noffset = nodes_offset;
int noffset_leaf = nodes_leaf_offset;
int mesh_tri_offset = mesh->tri_offset;
int mesh_curve_offset = mesh->curve_offset;
/* fill in node indexes for instances */
if(bvh->pack.root_index == -1)
pack.object_node[object_offset++] = -noffset_leaf-1;
else
pack.object_node[object_offset++] = noffset;
/* fill in node indexes for instances */
if (bvh->pack.root_index == -1)
pack.object_node[object_offset++] = -noffset_leaf - 1;
else
pack.object_node[object_offset++] = noffset;
mesh_map[mesh] = pack.object_node[object_offset-1];
mesh_map[mesh] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if(bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0];
int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL;
/* merge primitive, object and triangle indexes */
if (bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0];
int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
for(size_t i = 0; i < bvh_prim_index_size; i++) {
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] =
bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
}
for (size_t i = 0; i < bvh_prim_index_size; i++) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
if(bvh_prim_time != NULL) {
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
}
pack_prim_index_offset++;
}
}
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
if (bvh_prim_time != NULL) {
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
}
pack_prim_index_offset++;
}
}
/* Merge triangle vertices data. */
if(bvh->pack.prim_tri_verts.size()) {
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
&bvh->pack.prim_tri_verts[0],
prim_tri_size*sizeof(float4));
pack_prim_tri_verts_offset += prim_tri_size;
}
/* Merge triangle vertices data. */
if (bvh->pack.prim_tri_verts.size()) {
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
&bvh->pack.prim_tri_verts[0],
prim_tri_size * sizeof(float4));
pack_prim_tri_verts_offset += prim_tri_size;
}
/* merge nodes */
if(bvh->pack.leaf_nodes.size()) {
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size();
for(size_t i = 0, j = 0;
i < leaf_nodes_offset_size;
i += BVH_NODE_LEAF_SIZE, j++)
{
int4 data = leaf_nodes_offset[i];
data.x += prim_offset;
data.y += prim_offset;
pack_leaf_nodes[pack_leaf_nodes_offset] = data;
for(int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) {
pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j];
}
pack_leaf_nodes_offset += BVH_NODE_LEAF_SIZE;
}
}
/* merge nodes */
if (bvh->pack.leaf_nodes.size()) {
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size();
for (size_t i = 0, j = 0; i < leaf_nodes_offset_size; i += BVH_NODE_LEAF_SIZE, j++) {
int4 data = leaf_nodes_offset[i];
data.x += prim_offset;
data.y += prim_offset;
pack_leaf_nodes[pack_leaf_nodes_offset] = data;
for (int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) {
pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j];
}
pack_leaf_nodes_offset += BVH_NODE_LEAF_SIZE;
}
}
if(bvh->pack.nodes.size()) {
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
if (bvh->pack.nodes.size()) {
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
for(size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if(bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
if(use_obvh) {
nsize = BVH_UNALIGNED_ONODE_SIZE;
nsize_bbox = BVH_UNALIGNED_ONODE_SIZE-1;
}
else {
nsize = use_qbvh
? BVH_UNALIGNED_QNODE_SIZE
: BVH_UNALIGNED_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE-1 : 0;
}
}
else {
if(use_obvh) {
nsize = BVH_ONODE_SIZE;
nsize_bbox = BVH_ONODE_SIZE-1;
}
else {
nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
nsize_bbox = (use_qbvh)? BVH_QNODE_SIZE-1 : 0;
}
}
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
if (use_obvh) {
nsize = BVH_UNALIGNED_ONODE_SIZE;
nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
}
else {
nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
}
}
else {
if (use_obvh) {
nsize = BVH_ONODE_SIZE;
nsize_bbox = BVH_ONODE_SIZE - 1;
}
else {
nsize = (use_qbvh) ? BVH_QNODE_SIZE : BVH_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_QNODE_SIZE - 1 : 0;
}
}
memcpy(pack_nodes + pack_nodes_offset,
bvh_nodes + i,
nsize_bbox*sizeof(int4));
memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
int4 data1 = bvh_nodes[i + nsize_bbox-1];
if(use_obvh) {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0) ? -noffset_leaf : noffset;
data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
}
else {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
if(use_qbvh) {
data.x += (data.x < 0)? -noffset_leaf: noffset;
data.y += (data.y < 0)? -noffset_leaf: noffset;
}
}
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
if(use_obvh) {
pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
}
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
int4 data1 = bvh_nodes[i + nsize_bbox - 1];
if (use_obvh) {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0) ? -noffset_leaf : noffset;
data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
}
else {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
if (use_qbvh) {
data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0) ? -noffset_leaf : noffset;
}
}
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
if (use_obvh) {
pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
}
/* Usually this copies nothing, but we better
* be prepared for possible node size extension.
*/
memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox+1],
&bvh_nodes[i + nsize_bbox+1],
sizeof(int4) * (nsize - (nsize_bbox+1)));
/* Usually this copies nothing, but we better
* be prepared for possible node size extension.
*/
memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox + 1],
&bvh_nodes[i + nsize_bbox + 1],
sizeof(int4) * (nsize - (nsize_bbox + 1)));
pack_nodes_offset += nsize;
i += nsize;
}
}
pack_nodes_offset += nsize;
i += nsize;
}
}
nodes_offset += bvh->pack.nodes.size();
nodes_leaf_offset += bvh->pack.leaf_nodes.size();
prim_offset += bvh->pack.prim_index.size();
}
nodes_offset += bvh->pack.nodes.size();
nodes_leaf_offset += bvh->pack.leaf_nodes.size();
prim_offset += bvh->pack.prim_index.size();
}
}
CCL_NAMESPACE_END

View File

@@ -34,96 +34,92 @@ class LeafNode;
class Object;
class Progress;
#define BVH_ALIGN 4096
#define BVH_ALIGN 4096
#define TRI_NODE_SIZE 3
/* Packed BVH
*
* BVH stored as it will be used for traversal on the rendering device. */
struct PackedBVH {
/* BVH nodes storage, one node is 4x int4, and contains two bounding boxes,
* and child, triangle or object indexes depending on the node type */
array<int4> nodes;
/* BVH leaf nodes storage. */
array<int4> leaf_nodes;
/* object index to BVH node index mapping for instances */
array<int> object_node;
/* Mapping from primitive index to index in triangle array. */
array<uint> prim_tri_index;
/* Continuous storage of triangle vertices. */
array<float4> prim_tri_verts;
/* primitive type - triangle or strand */
array<int> prim_type;
/* visibility visibilitys for primitives */
array<uint> prim_visibility;
/* mapping from BVH primitive index to true primitive index, as primitives
* may be duplicated due to spatial splits. -1 for instances. */
array<int> prim_index;
/* mapping from BVH primitive index, to the object id of that primitive. */
array<int> prim_object;
/* Time range of BVH primitive. */
array<float2> prim_time;
/* BVH nodes storage, one node is 4x int4, and contains two bounding boxes,
* and child, triangle or object indexes depending on the node type */
array<int4> nodes;
/* BVH leaf nodes storage. */
array<int4> leaf_nodes;
/* object index to BVH node index mapping for instances */
array<int> object_node;
/* Mapping from primitive index to index in triangle array. */
array<uint> prim_tri_index;
/* Continuous storage of triangle vertices. */
array<float4> prim_tri_verts;
/* primitive type - triangle or strand */
array<int> prim_type;
/* visibility visibilitys for primitives */
array<uint> prim_visibility;
/* mapping from BVH primitive index to true primitive index, as primitives
* may be duplicated due to spatial splits. -1 for instances. */
array<int> prim_index;
/* mapping from BVH primitive index, to the object id of that primitive. */
array<int> prim_object;
/* Time range of BVH primitive. */
array<float2> prim_time;
/* index of the root node. */
int root_index;
/* index of the root node. */
int root_index;
PackedBVH()
{
root_index = 0;
}
PackedBVH()
{
root_index = 0;
}
};
enum BVH_TYPE {
bvh2,
bvh4,
bvh8
};
enum BVH_TYPE { bvh2, bvh4, bvh8 };
/* BVH */
class BVH
{
public:
PackedBVH pack;
BVHParams params;
vector<Object*> objects;
class BVH {
public:
PackedBVH pack;
BVHParams params;
vector<Object *> objects;
static BVH *create(const BVHParams& params, const vector<Object*>& objects);
virtual ~BVH() {}
static BVH *create(const BVHParams &params, const vector<Object *> &objects);
virtual ~BVH()
{
}
virtual void build(Progress& progress, Stats *stats=NULL);
void refit(Progress& progress);
virtual void build(Progress &progress, Stats *stats = NULL);
void refit(Progress &progress);
protected:
BVH(const BVHParams& params, const vector<Object*>& objects);
protected:
BVH(const BVHParams &params, const vector<Object *> &objects);
/* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox& bbox, uint& visibility);
/* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox &bbox, uint &visibility);
/* triangles and strands */
void pack_primitives();
void pack_triangle(int idx, float4 storage[3]);
/* triangles and strands */
void pack_primitives();
void pack_triangle(int idx, float4 storage[3]);
/* merge instance BVH's */
void pack_instances(size_t nodes_size, size_t leaf_nodes_size);
/* merge instance BVH's */
void pack_instances(size_t nodes_size, size_t leaf_nodes_size);
/* for subclasses to implement */
virtual void pack_nodes(const BVHNode *root) = 0;
virtual void refit_nodes() = 0;
/* for subclasses to implement */
virtual void pack_nodes(const BVHNode *root) = 0;
virtual void refit_nodes() = 0;
virtual BVHNode *widen_children_nodes(const BVHNode *root) = 0;
virtual BVHNode *widen_children_nodes(const BVHNode *root) = 0;
};
/* Pack Utility */
struct BVHStackEntry
{
const BVHNode *node;
int idx;
struct BVHStackEntry {
const BVHNode *node;
int idx;
BVHStackEntry(const BVHNode *n = 0, int i = 0);
int encodeIdx() const;
BVHStackEntry(const BVHNode *n = 0, int i = 0);
int encodeIdx() const;
};
CCL_NAMESPACE_END
#endif /* __BVH_H__ */
#endif /* __BVH_H__ */

View File

@@ -25,276 +25,268 @@
CCL_NAMESPACE_BEGIN
BVH2::BVH2(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH2::BVH2(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
}
BVHNode *BVH2::widen_children_nodes(const BVHNode *root)
{
return const_cast<BVHNode *>(root);
return const_cast<BVHNode *>(root);
}
void BVH2::pack_leaf(const BVHStackEntry& e,
const LeafNode *leaf)
void BVH2::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
float4 data[BVH_NODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
}
else {
/* triangle */
data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
float4 data[BVH_NODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
}
else {
/* triangle */
data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
}
void BVH2::pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1)
{
if(e0.node->is_unaligned || e1.node->is_unaligned) {
pack_unaligned_inner(e, e0, e1);
} else {
pack_aligned_inner(e, e0, e1);
}
if (e0.node->is_unaligned || e1.node->is_unaligned) {
pack_unaligned_inner(e, e0, e1);
}
else {
pack_aligned_inner(e, e0, e1);
}
}
void BVH2::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1)
{
pack_aligned_node(e.idx,
e0.node->bounds, e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(),
e0.node->visibility, e1.node->visibility);
pack_aligned_node(e.idx,
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
}
void BVH2::pack_aligned_node(int idx,
const BoundBox& b0,
const BoundBox& b1,
int c0, int c1,
uint visibility0, uint visibility1)
const BoundBox &b0,
const BoundBox &b1,
int c0,
int c1,
uint visibility0,
uint visibility1)
{
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
int4 data[BVH_NODE_SIZE] = {
make_int4(visibility0 & ~PATH_RAY_NODE_UNALIGNED,
visibility1 & ~PATH_RAY_NODE_UNALIGNED,
c0, c1),
make_int4(__float_as_int(b0.min.x),
__float_as_int(b1.min.x),
__float_as_int(b0.max.x),
__float_as_int(b1.max.x)),
make_int4(__float_as_int(b0.min.y),
__float_as_int(b1.min.y),
__float_as_int(b0.max.y),
__float_as_int(b1.max.y)),
make_int4(__float_as_int(b0.min.z),
__float_as_int(b1.min.z),
__float_as_int(b0.max.z),
__float_as_int(b1.max.z)),
};
int4 data[BVH_NODE_SIZE] = {
make_int4(
visibility0 & ~PATH_RAY_NODE_UNALIGNED, visibility1 & ~PATH_RAY_NODE_UNALIGNED, c0, c1),
make_int4(__float_as_int(b0.min.x),
__float_as_int(b1.min.x),
__float_as_int(b0.max.x),
__float_as_int(b1.max.x)),
make_int4(__float_as_int(b0.min.y),
__float_as_int(b1.min.y),
__float_as_int(b0.max.y),
__float_as_int(b1.max.y)),
make_int4(__float_as_int(b0.min.z),
__float_as_int(b1.min.z),
__float_as_int(b0.max.z),
__float_as_int(b1.max.z)),
};
memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(int4) * BVH_NODE_SIZE);
}
void BVH2::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1)
{
pack_unaligned_node(e.idx,
e0.node->get_aligned_space(),
e1.node->get_aligned_space(),
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(),
e0.node->visibility, e1.node->visibility);
pack_unaligned_node(e.idx,
e0.node->get_aligned_space(),
e1.node->get_aligned_space(),
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
}
void BVH2::pack_unaligned_node(int idx,
const Transform& aligned_space0,
const Transform& aligned_space1,
const BoundBox& bounds0,
const BoundBox& bounds1,
int c0, int c1,
uint visibility0, uint visibility1)
const Transform &aligned_space0,
const Transform &aligned_space1,
const BoundBox &bounds0,
const BoundBox &bounds1,
int c0,
int c1,
uint visibility0,
uint visibility1)
{
assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
float4 data[BVH_UNALIGNED_NODE_SIZE];
Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
aligned_space0);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
aligned_space1);
data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(c0),
__int_as_float(c1));
float4 data[BVH_UNALIGNED_NODE_SIZE];
Transform space0 = BVHUnaligned::compute_node_transform(bounds0, aligned_space0);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1, aligned_space1);
data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(c0),
__int_as_float(c1));
data[1] = space0.x;
data[2] = space0.y;
data[3] = space0.z;
data[4] = space1.x;
data[5] = space1.y;
data[6] = space1.z;
data[1] = space0.x;
data[2] = space0.y;
data[3] = space0.z;
data[4] = space1.x;
data[5] = space1.y;
data[6] = space1.z;
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_NODE_SIZE);
}
void BVH2::pack_nodes(const BVHNode *root)
{
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if(params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
}
else {
node_size = num_inner_nodes * BVH_NODE_SIZE;
}
/* Resize arrays */
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
}
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
}
else {
node_size = num_inner_nodes * BVH_NODE_SIZE;
}
/* Resize arrays */
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if (params.top_level) {
pack_instances(node_size, num_leaf_nodes * BVH_NODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes * BVH_NODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
if(root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
}
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH * 2);
if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE : BVH_NODE_SIZE;
}
while(stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
while (stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
if(e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
pack_leaf(e, leaf);
}
else {
/* inner node */
int idx[2];
for(int i = 0; i < 2; ++i) {
if(e.node->get_child(i)->is_leaf()) {
idx[i] = nextLeafNodeIdx++;
}
else {
idx[i] = nextNodeIdx;
nextNodeIdx += e.node->get_child(i)->has_unaligned()
? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
}
}
if (e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf);
}
else {
/* inner node */
int idx[2];
for (int i = 0; i < 2; ++i) {
if (e.node->get_child(i)->is_leaf()) {
idx[i] = nextLeafNodeIdx++;
}
else {
idx[i] = nextNodeIdx;
nextNodeIdx += e.node->get_child(i)->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE :
BVH_NODE_SIZE;
}
}
stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]);
}
}
assert(node_size == nextNodeIdx);
/* root index to start traversal at, to handle case of single leaf node */
pack.root_index = (root->is_leaf())? -1: 0;
pack_inner(e, stack[stack.size() - 2], stack[stack.size() - 1]);
}
}
assert(node_size == nextNodeIdx);
/* root index to start traversal at, to handle case of single leaf node */
pack.root_index = (root->is_leaf()) ? -1 : 0;
}
void BVH2::refit_nodes()
{
assert(!params.top_level);
assert(!params.top_level);
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
}
void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
void BVH2::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
if(leaf) {
/* refit leaf node */
assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
const int4 *data = &pack.leaf_nodes[idx];
const int c0 = data[0].x;
const int c1 = data[0].y;
if (leaf) {
/* refit leaf node */
assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
const int4 *data = &pack.leaf_nodes[idx];
const int c0 = data[0].x;
const int c1 = data[0].y;
BVH::refit_primitives(c0, c1, bbox, visibility);
BVH::refit_primitives(c0, c1, bbox, visibility);
/* TODO(sergey): De-duplicate with pack_leaf(). */
float4 leaf_data[BVH_NODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c0);
leaf_data[0].y = __int_as_float(c1);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(data[0].w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
}
else {
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
/* TODO(sergey): De-duplicate with pack_leaf(). */
float4 leaf_data[BVH_NODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c0);
leaf_data[0].y = __int_as_float(c1);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(data[0].w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
}
else {
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
const int4 *data = &pack.nodes[idx];
const bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
const int c0 = data[0].z;
const int c1 = data[0].w;
/* refit inner node, set bbox from children */
BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
uint visibility0 = 0, visibility1 = 0;
const int4 *data = &pack.nodes[idx];
const bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
const int c0 = data[0].z;
const int c1 = data[0].w;
/* refit inner node, set bbox from children */
BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
uint visibility0 = 0, visibility1 = 0;
refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0);
refit_node((c1 < 0)? -c1-1: c1, (c1 < 0), bbox1, visibility1);
refit_node((c0 < 0) ? -c0 - 1 : c0, (c0 < 0), bbox0, visibility0);
refit_node((c1 < 0) ? -c1 - 1 : c1, (c1 < 0), bbox1, visibility1);
if(is_unaligned) {
Transform aligned_space = transform_identity();
pack_unaligned_node(idx,
aligned_space, aligned_space,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
}
else {
pack_aligned_node(idx,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
}
if (is_unaligned) {
Transform aligned_space = transform_identity();
pack_unaligned_node(
idx, aligned_space, aligned_space, bbox0, bbox1, c0, c1, visibility0, visibility1);
}
else {
pack_aligned_node(idx, bbox0, bbox1, c0, c1, visibility0, visibility1);
}
bbox.grow(bbox0);
bbox.grow(bbox1);
visibility = visibility0|visibility1;
}
bbox.grow(bbox0);
bbox.grow(bbox1);
visibility = visibility0 | visibility1;
}
}
CCL_NAMESPACE_END

View File

@@ -34,8 +34,8 @@ class LeafNode;
class Object;
class Progress;
#define BVH_NODE_SIZE 4
#define BVH_NODE_LEAF_SIZE 1
#define BVH_NODE_SIZE 4
#define BVH_NODE_LEAF_SIZE 1
#define BVH_UNALIGNED_NODE_SIZE 7
/* BVH2
@@ -43,48 +43,49 @@ class Progress;
* Typical BVH with each node having two children.
*/
class BVH2 : public BVH {
protected:
/* constructor */
friend class BVH;
BVH2(const BVHParams& params, const vector<Object*>& objects);
protected:
/* constructor */
friend class BVH;
BVH2(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* pack */
void pack_nodes(const BVHNode *root) override;
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e,
const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_aligned_node(int idx,
const BoundBox& b0,
const BoundBox& b1,
int c0, int c1,
uint visibility0, uint visibility1);
void pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1);
void pack_aligned_node(int idx,
const BoundBox &b0,
const BoundBox &b1,
int c0,
int c1,
uint visibility0,
uint visibility1);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_unaligned_node(int idx,
const Transform& aligned_space0,
const Transform& aligned_space1,
const BoundBox& b0,
const BoundBox& b1,
int c0, int c1,
uint visibility0, uint visibility1);
void pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1);
void pack_unaligned_node(int idx,
const Transform &aligned_space0,
const Transform &aligned_space1,
const BoundBox &b0,
const BoundBox &b1,
int c0,
int c1,
uint visibility0,
uint visibility1);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
};
CCL_NAMESPACE_END
#endif /* __BVH2_H__ */
#endif /* __BVH2_H__ */

View File

@@ -31,141 +31,131 @@ CCL_NAMESPACE_BEGIN
* life easier all over the place.
*/
BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH4::BVH4(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
params.bvh_layout = BVH_LAYOUT_BVH4;
params.bvh_layout = BVH_LAYOUT_BVH4;
}
namespace {
BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
{
if(node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
}
/* Collect nodes of one layer deeper, allowing us to have more childrem in
* an inner layer. */
assert(node->num_children() <= 2);
const BVHNode *children[4];
const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1);
int num_children = 0;
if(child0->is_leaf()) {
children[num_children++] = child0;
}
else {
children[num_children++] = child0->get_child(0);
children[num_children++] = child0->get_child(1);
}
if(child1->is_leaf()) {
children[num_children++] = child1;
}
else {
children[num_children++] = child1->get_child(0);
children[num_children++] = child1->get_child(1);
}
/* Merge children in subtrees. */
BVHNode *children4[4];
for(int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]);
}
/* Allocate new node. */
BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
/* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the
* parameters there. */
if(node->is_unaligned) {
node4->is_unaligned = true;
node4->aligned_space = new Transform();
*node4->aligned_space = *node->aligned_space;
}
return node4;
if (node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
}
/* Collect nodes of one layer deeper, allowing us to have more childrem in
* an inner layer. */
assert(node->num_children() <= 2);
const BVHNode *children[4];
const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1);
int num_children = 0;
if (child0->is_leaf()) {
children[num_children++] = child0;
}
else {
children[num_children++] = child0->get_child(0);
children[num_children++] = child0->get_child(1);
}
if (child1->is_leaf()) {
children[num_children++] = child1;
}
else {
children[num_children++] = child1->get_child(0);
children[num_children++] = child1->get_child(1);
}
/* Merge children in subtrees. */
BVHNode *children4[4];
for (int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]);
}
/* Allocate new node. */
BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
/* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the
* parameters there. */
if (node->is_unaligned) {
node4->is_unaligned = true;
node4->aligned_space = new Transform();
*node4->aligned_space = *node->aligned_space;
}
return node4;
}
} // namespace
BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
{
if(root == NULL) {
return NULL;
}
if(root->is_leaf()) {
return const_cast<BVHNode *>(root);
}
BVHNode *root4 = bvh_node_merge_children_recursively(root);
/* TODO(sergey): Pack children nodes to parents which has less that 4
* children. */
return root4;
if (root == NULL) {
return NULL;
}
if (root->is_leaf()) {
return const_cast<BVHNode *>(root);
}
BVHNode *root4 = bvh_node_merge_children_recursively(root);
/* TODO(sergey): Pack children nodes to parents which has less that 4
* children. */
return root4;
}
void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
void BVH4::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
float4 data[BVH_QNODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
}
else {
/* triangle */
data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
float4 data[BVH_QNODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
}
else {
/* triangle */
data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
}
void BVH4::pack_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here.
*/
if(params.use_unaligned_nodes) {
for(int i = 0; i < num; i++) {
if(en[i].node->is_unaligned) {
has_unaligned = true;
break;
}
}
}
if(has_unaligned) {
/* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num);
}
else {
/* Create unaligned node with orientation transform for each of the
* children.
*/
pack_aligned_inner(e, en, num);
}
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here.
*/
if (params.use_unaligned_nodes) {
for (int i = 0; i < num; i++) {
if (en[i].node->is_unaligned) {
has_unaligned = true;
break;
}
}
}
if (has_unaligned) {
/* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num);
}
else {
/* Create unaligned node with orientation transform for each of the
* children.
*/
pack_aligned_inner(e, en, num);
}
}
void BVH4::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
BoundBox bounds[4];
int child[4];
for(int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(e.idx,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
BoundBox bounds[4];
int child[4];
for (int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(
e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
}
void BVH4::pack_aligned_node(int idx,
@@ -176,66 +166,64 @@ void BVH4::pack_aligned_node(int idx,
const float time_to,
const int num)
{
float4 data[BVH_QNODE_SIZE];
memset(data, 0, sizeof(data));
float4 data[BVH_QNODE_SIZE];
memset(data, 0, sizeof(data));
data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from;
data[0].z = time_to;
data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from;
data[0].z = time_to;
for(int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
for (int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
data[1][i] = bb_min.x;
data[2][i] = bb_max.x;
data[3][i] = bb_min.y;
data[4][i] = bb_max.y;
data[5][i] = bb_min.z;
data[6][i] = bb_max.z;
data[1][i] = bb_min.x;
data[2][i] = bb_max.x;
data[3][i] = bb_min.y;
data[4][i] = bb_max.y;
data[5][i] = bb_min.z;
data[6][i] = bb_max.z;
data[7][i] = __int_as_float(child[i]);
}
data[7][i] = __int_as_float(child[i]);
}
for(int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX;
for (int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX;
data[3][i] = FLT_MAX;
data[4][i] = -FLT_MAX;
data[3][i] = FLT_MAX;
data[4][i] = -FLT_MAX;
data[5][i] = FLT_MAX;
data[6][i] = -FLT_MAX;
data[5][i] = FLT_MAX;
data[6][i] = -FLT_MAX;
data[7][i] = __int_as_float(0);
}
data[7][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
}
void BVH4::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
Transform aligned_space[4];
BoundBox bounds[4];
int child[4];
for(int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_unaligned_node(e.idx,
aligned_space,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
Transform aligned_space[4];
BoundBox bounds[4];
int child[4];
for (int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_unaligned_node(e.idx,
aligned_space,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
}
void BVH4::pack_unaligned_node(int idx,
@@ -247,235 +235,211 @@ void BVH4::pack_unaligned_node(int idx,
const float time_to,
const int num)
{
float4 data[BVH_UNALIGNED_QNODE_SIZE];
memset(data, 0, sizeof(data));
float4 data[BVH_UNALIGNED_QNODE_SIZE];
memset(data, 0, sizeof(data));
data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from;
data[0].z = time_to;
data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from;
data[0].z = time_to;
for(int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
bounds[i],
aligned_space[i]);
for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
data[3][i] = space.x.z;
data[1][i] = space.x.x;
data[2][i] = space.x.y;
data[3][i] = space.x.z;
data[4][i] = space.y.x;
data[5][i] = space.y.y;
data[6][i] = space.y.z;
data[4][i] = space.y.x;
data[5][i] = space.y.y;
data[6][i] = space.y.z;
data[7][i] = space.z.x;
data[8][i] = space.z.y;
data[9][i] = space.z.z;
data[7][i] = space.z.x;
data[8][i] = space.z.y;
data[9][i] = space.z.z;
data[10][i] = space.x.w;
data[11][i] = space.y.w;
data[12][i] = space.z.w;
data[10][i] = space.x.w;
data[11][i] = space.y.w;
data[12][i] = space.z.w;
data[13][i] = __int_as_float(child[i]);
}
data[13][i] = __int_as_float(child[i]);
}
for(int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
for (int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
data[1][i] = NAN;
data[2][i] = NAN;
data[3][i] = NAN;
data[1][i] = NAN;
data[2][i] = NAN;
data[3][i] = NAN;
data[4][i] = NAN;
data[5][i] = NAN;
data[6][i] = NAN;
data[4][i] = NAN;
data[5][i] = NAN;
data[6][i] = NAN;
data[7][i] = NAN;
data[8][i] = NAN;
data[9][i] = NAN;
data[7][i] = NAN;
data[8][i] = NAN;
data[9][i] = NAN;
data[10][i] = NAN;
data[11][i] = NAN;
data[12][i] = NAN;
data[10][i] = NAN;
data[11][i] = NAN;
data[12][i] = NAN;
data[13][i] = __int_as_float(0);
}
data[13][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_QNODE_SIZE);
}
/* Quad SIMD Nodes */
void BVH4::pack_nodes(const BVHNode *root)
{
/* Calculate size of the arrays required. */
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if(params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
}
else {
node_size = num_inner_nodes * BVH_QNODE_SIZE;
}
/* Resize arrays. */
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
}
/* Calculate size of the arrays required. */
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
}
else {
node_size = num_inner_nodes * BVH_QNODE_SIZE;
}
/* Resize arrays. */
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if (params.top_level) {
pack_instances(node_size, num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
if(root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
}
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH * 2);
if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
}
while(stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
while (stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
if(e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
pack_leaf(e, leaf);
}
else {
/* Inner node. */
/* Collect nodes. */
const BVHNode *children[4];
const int num_children = e.node->num_children();
/* Push entries on the stack. */
for(int i = 0; i < num_children; ++i) {
int idx;
children[i] = e.node->get_child(i);
assert(children[i] != NULL);
if(children[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned()
? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
/* Set node. */
pack_inner(e, &stack[stack.size() - num_children], num_children);
}
}
if (e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf);
}
else {
/* Inner node. */
/* Collect nodes. */
const BVHNode *children[4];
const int num_children = e.node->num_children();
/* Push entries on the stack. */
for (int i = 0; i < num_children; ++i) {
int idx;
children[i] = e.node->get_child(i);
assert(children[i] != NULL);
if (children[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
/* Set node. */
pack_inner(e, &stack[stack.size() - num_children], num_children);
}
}
assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf())? -1: 0;
assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf()) ? -1 : 0;
}
void BVH4::refit_nodes()
{
assert(!params.top_level);
assert(!params.top_level);
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
}
void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
void BVH4::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
if(leaf) {
/* Refit leaf node. */
int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
if (leaf) {
/* Refit leaf node. */
int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
BVH::refit_primitives(c.x, c.y, bbox, visibility);
BVH::refit_primitives(c.x, c.y, bbox, visibility);
/* TODO(sergey): This is actually a copy of pack_leaf(),
* but this chunk of code only knows actual data and has
* no idea about BVHNode.
*
* Would be nice to de-duplicate code, but trying to make
* making code more general ends up in much nastier code
* in my opinion so far.
*
* Same applies to the inner nodes case below.
*/
float4 leaf_data[BVH_QNODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c.x);
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
}
else {
int4 *data = &pack.nodes[idx];
bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
int4 c;
if(is_unaligned) {
c = data[13];
}
else {
c = data[7];
}
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[4] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
uint child_visibility[4] = {0};
int num_nodes = 0;
/* TODO(sergey): This is actually a copy of pack_leaf(),
* but this chunk of code only knows actual data and has
* no idea about BVHNode.
*
* Would be nice to de-duplicate code, but trying to make
* making code more general ends up in much nastier code
* in my opinion so far.
*
* Same applies to the inner nodes case below.
*/
float4 leaf_data[BVH_QNODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c.x);
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
}
else {
int4 *data = &pack.nodes[idx];
bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
int4 c;
if (is_unaligned) {
c = data[13];
}
else {
c = data[7];
}
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
uint child_visibility[4] = {0};
int num_nodes = 0;
for(int i = 0; i < 4; ++i) {
if(c[i] != 0) {
refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
child_bbox[i], child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
}
}
for (int i = 0; i < 4; ++i) {
if (c[i] != 0) {
refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
}
}
if(is_unaligned) {
Transform aligned_space[4] = {transform_identity(),
transform_identity(),
transform_identity(),
transform_identity()};
pack_unaligned_node(idx,
aligned_space,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
}
}
if (is_unaligned) {
Transform aligned_space[4] = {
transform_identity(), transform_identity(), transform_identity(), transform_identity()};
pack_unaligned_node(
idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
}
else {
pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
}
}
}
CCL_NAMESPACE_END

View File

@@ -34,8 +34,8 @@ class LeafNode;
class Object;
class Progress;
#define BVH_QNODE_SIZE 8
#define BVH_QNODE_LEAF_SIZE 1
#define BVH_QNODE_SIZE 8
#define BVH_QNODE_LEAF_SIZE 1
#define BVH_UNALIGNED_QNODE_SIZE 14
/* BVH4
@@ -43,48 +43,44 @@ class Progress;
* Quad BVH, with each node having four children, to use with SIMD instructions.
*/
class BVH4 : public BVH {
protected:
/* constructor */
friend class BVH;
BVH4(const BVHParams& params, const vector<Object*>& objects);
protected:
/* constructor */
friend class BVH;
BVH4(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* pack */
void pack_nodes(const BVHNode *root) override;
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
};
CCL_NAMESPACE_END
#endif /* __BVH4_H__ */
#endif /* __BVH4_H__ */

View File

@@ -36,8 +36,7 @@
CCL_NAMESPACE_BEGIN
BVH8::BVH8(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH8::BVH8(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
}
@@ -45,159 +44,148 @@ namespace {
BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
{
if(node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
}
/* Collect nodes of two layer deeper, allowing us to have more childrem in
* an inner layer. */
assert(node->num_children() <= 2);
const BVHNode *children[8];
const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1);
int num_children = 0;
if(child0->is_leaf()) {
children[num_children++] = child0;
}
else {
const BVHNode *child00 = child0->get_child(0),
*child01 = child0->get_child(1);
if(child00->is_leaf()) {
children[num_children++] = child00;
}
else {
children[num_children++] = child00->get_child(0);
children[num_children++] = child00->get_child(1);
}
if(child01->is_leaf()) {
children[num_children++] = child01;
}
else {
children[num_children++] = child01->get_child(0);
children[num_children++] = child01->get_child(1);
}
}
if(child1->is_leaf()) {
children[num_children++] = child1;
}
else {
const BVHNode *child10 = child1->get_child(0),
*child11 = child1->get_child(1);
if(child10->is_leaf()) {
children[num_children++] = child10;
}
else {
children[num_children++] = child10->get_child(0);
children[num_children++] = child10->get_child(1);
}
if(child11->is_leaf()) {
children[num_children++] = child11;
}
else {
children[num_children++] = child11->get_child(0);
children[num_children++] = child11->get_child(1);
}
}
/* Merge children in subtrees. */
BVHNode *children4[8];
for(int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]);
}
/* Allocate new node. */
BVHNode *node8 = new InnerNode(node->bounds, children4, num_children);
/* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the
* parameters there. */
if(node->is_unaligned) {
node8->is_unaligned = true;
node8->aligned_space = new Transform();
*node8->aligned_space = *node->aligned_space;
}
return node8;
if (node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
}
/* Collect nodes of two layer deeper, allowing us to have more childrem in
* an inner layer. */
assert(node->num_children() <= 2);
const BVHNode *children[8];
const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1);
int num_children = 0;
if (child0->is_leaf()) {
children[num_children++] = child0;
}
else {
const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
if (child00->is_leaf()) {
children[num_children++] = child00;
}
else {
children[num_children++] = child00->get_child(0);
children[num_children++] = child00->get_child(1);
}
if (child01->is_leaf()) {
children[num_children++] = child01;
}
else {
children[num_children++] = child01->get_child(0);
children[num_children++] = child01->get_child(1);
}
}
if (child1->is_leaf()) {
children[num_children++] = child1;
}
else {
const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
if (child10->is_leaf()) {
children[num_children++] = child10;
}
else {
children[num_children++] = child10->get_child(0);
children[num_children++] = child10->get_child(1);
}
if (child11->is_leaf()) {
children[num_children++] = child11;
}
else {
children[num_children++] = child11->get_child(0);
children[num_children++] = child11->get_child(1);
}
}
/* Merge children in subtrees. */
BVHNode *children4[8];
for (int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]);
}
/* Allocate new node. */
BVHNode *node8 = new InnerNode(node->bounds, children4, num_children);
/* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the
* parameters there. */
if (node->is_unaligned) {
node8->is_unaligned = true;
node8->aligned_space = new Transform();
*node8->aligned_space = *node->aligned_space;
}
return node8;
}
} // namespace
BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
{
if(root == NULL) {
return NULL;
}
if(root->is_leaf()) {
return const_cast<BVHNode *>(root);
}
BVHNode *root8 = bvh_node_merge_children_recursively(root);
/* TODO(sergey): Pack children nodes to parents which has less that 4
* children. */
return root8;
if (root == NULL) {
return NULL;
}
if (root->is_leaf()) {
return const_cast<BVHNode *>(root);
}
BVHNode *root8 = bvh_node_merge_children_recursively(root);
/* TODO(sergey): Pack children nodes to parents which has less that 4
* children. */
return root8;
}
void BVH8::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
void BVH8::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
float4 data[BVH_ONODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
}
else {
/* triangle */
data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
float4 data[BVH_ONODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
}
else {
/* triangle */
data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_ONODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
}
void BVH8::pack_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here.
*/
if(params.use_unaligned_nodes) {
for(int i = 0; i < num; i++) {
if(en[i].node->is_unaligned) {
has_unaligned = true;
break;
}
}
}
if(has_unaligned) {
/* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num);
}
else {
/* Create unaligned node with orientation transform for each of the
* children.
*/
pack_aligned_inner(e, en, num);
}
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here.
*/
if (params.use_unaligned_nodes) {
for (int i = 0; i < num; i++) {
if (en[i].node->is_unaligned) {
has_unaligned = true;
break;
}
}
}
if (has_unaligned) {
/* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num);
}
else {
/* Create unaligned node with orientation transform for each of the
* children.
*/
pack_aligned_inner(e, en, num);
}
}
void BVH8::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
BoundBox bounds[8];
int child[8];
for(int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(e.idx,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
BoundBox bounds[8];
int child[8];
for (int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(
e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
}
void BVH8::pack_aligned_node(int idx,
@@ -208,66 +196,64 @@ void BVH8::pack_aligned_node(int idx,
const float time_to,
const int num)
{
float8 data[8];
memset(data, 0, sizeof(data));
float8 data[8];
memset(data, 0, sizeof(data));
data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from;
data[0].c = time_to;
data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from;
data[0].c = time_to;
for(int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
for (int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
data[1][i] = bb_min.x;
data[2][i] = bb_max.x;
data[3][i] = bb_min.y;
data[4][i] = bb_max.y;
data[5][i] = bb_min.z;
data[6][i] = bb_max.z;
data[1][i] = bb_min.x;
data[2][i] = bb_max.x;
data[3][i] = bb_min.y;
data[4][i] = bb_max.y;
data[5][i] = bb_min.z;
data[6][i] = bb_max.z;
data[7][i] = __int_as_float(child[i]);
}
data[7][i] = __int_as_float(child[i]);
}
for(int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX;
for (int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX;
data[3][i] = FLT_MAX;
data[4][i] = -FLT_MAX;
data[3][i] = FLT_MAX;
data[4][i] = -FLT_MAX;
data[5][i] = FLT_MAX;
data[6][i] = -FLT_MAX;
data[5][i] = FLT_MAX;
data[6][i] = -FLT_MAX;
data[7][i] = __int_as_float(0);
}
data[7][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_ONODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
}
void BVH8::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
Transform aligned_space[8];
BoundBox bounds[8];
int child[8];
for(int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_unaligned_node(e.idx,
aligned_space,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
Transform aligned_space[8];
BoundBox bounds[8];
int child[8];
for (int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_unaligned_node(e.idx,
aligned_space,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
}
void BVH8::pack_unaligned_node(int idx,
@@ -279,283 +265,275 @@ void BVH8::pack_unaligned_node(int idx,
const float time_to,
const int num)
{
float8 data[BVH_UNALIGNED_ONODE_SIZE];
memset(data, 0, sizeof(data));
float8 data[BVH_UNALIGNED_ONODE_SIZE];
memset(data, 0, sizeof(data));
data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from;
data[0].c = time_to;
data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from;
data[0].c = time_to;
for(int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
bounds[i],
aligned_space[i]);
for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
data[3][i] = space.x.z;
data[1][i] = space.x.x;
data[2][i] = space.x.y;
data[3][i] = space.x.z;
data[4][i] = space.y.x;
data[5][i] = space.y.y;
data[6][i] = space.y.z;
data[4][i] = space.y.x;
data[5][i] = space.y.y;
data[6][i] = space.y.z;
data[7][i] = space.z.x;
data[8][i] = space.z.y;
data[9][i] = space.z.z;
data[7][i] = space.z.x;
data[8][i] = space.z.y;
data[9][i] = space.z.z;
data[10][i] = space.x.w;
data[11][i] = space.y.w;
data[12][i] = space.z.w;
data[10][i] = space.x.w;
data[11][i] = space.y.w;
data[12][i] = space.z.w;
data[13][i] = __int_as_float(child[i]);
}
data[13][i] = __int_as_float(child[i]);
}
for(int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
for (int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
data[1][i] = NAN;
data[2][i] = NAN;
data[3][i] = NAN;
data[1][i] = NAN;
data[2][i] = NAN;
data[3][i] = NAN;
data[4][i] = NAN;
data[5][i] = NAN;
data[6][i] = NAN;
data[4][i] = NAN;
data[5][i] = NAN;
data[6][i] = NAN;
data[7][i] = NAN;
data[8][i] = NAN;
data[9][i] = NAN;
data[7][i] = NAN;
data[8][i] = NAN;
data[9][i] = NAN;
data[10][i] = NAN;
data[11][i] = NAN;
data[12][i] = NAN;
data[10][i] = NAN;
data[11][i] = NAN;
data[12][i] = NAN;
data[13][i] = __int_as_float(0);
}
data[13][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_ONODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_ONODE_SIZE);
}
/* Quad SIMD Nodes */
void BVH8::pack_nodes(const BVHNode *root)
{
/* Calculate size of the arrays required. */
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if(params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
}
else {
node_size = num_inner_nodes * BVH_ONODE_SIZE;
}
/* Resize arrays. */
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
pack_instances(node_size, num_leaf_nodes*BVH_ONODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_ONODE_LEAF_SIZE);
}
/* Calculate size of the arrays required. */
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
}
else {
node_size = num_inner_nodes * BVH_ONODE_SIZE;
}
/* Resize arrays. */
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if (params.top_level) {
pack_instances(node_size, num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
if(root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE
: BVH_ONODE_SIZE;
}
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH * 2);
if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
}
while(stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
while (stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
if(e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
pack_leaf(e, leaf);
}
else {
/* Inner node. */
/* Collect nodes. */
const BVHNode *children[8];
int num_children = e.node->num_children();
/* Push entries on the stack. */
for(int i = 0; i < num_children; ++i) {
int idx;
children[i] = e.node->get_child(i);
if(children[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned()
? BVH_UNALIGNED_ONODE_SIZE
: BVH_ONODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
/* Set node. */
pack_inner(e, &stack[stack.size() - num_children], num_children);
}
}
if (e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf);
}
else {
/* Inner node. */
/* Collect nodes. */
const BVHNode *children[8];
int num_children = e.node->num_children();
/* Push entries on the stack. */
for (int i = 0; i < num_children; ++i) {
int idx;
children[i] = e.node->get_child(i);
if (children[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
/* Set node. */
pack_inner(e, &stack[stack.size() - num_children], num_children);
}
}
assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf()) ? -1 : 0;
assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf()) ? -1 : 0;
}
void BVH8::refit_nodes()
{
assert(!params.top_level);
assert(!params.top_level);
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
}
void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
if(leaf) {
int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
/* Refit leaf node. */
for(int prim = c.x; prim < c.y; prim++) {
int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim];
Object *ob = objects[tob];
if (leaf) {
int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
/* Refit leaf node. */
for (int prim = c.x; prim < c.y; prim++) {
int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim];
Object *ob = objects[tob];
if(pidx == -1) {
/* Object instance. */
bbox.grow(ob->bounds);
}
else {
/* Primitives. */
const Mesh *mesh = ob->mesh;
if (pidx == -1) {
/* Object instance. */
bbox.grow(ob->bounds);
}
else {
/* Primitives. */
const Mesh *mesh = ob->mesh;
if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
visibility |= PATH_RAY_CURVE;
visibility |= PATH_RAY_CURVE;
/* Motion curves. */
if(mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
/* Motion curves. */
if (mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3();
if (attr) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) {
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
}
}
}
}
else {
/* Triangles. */
int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0];
for (size_t i = 0; i < steps; i++) {
curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
}
}
}
}
else {
/* Triangles. */
int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox);
triangle.bounds_grow(vpos, bbox);
/* Motion triangles. */
if(mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
/* Motion triangles. */
if (mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3();
if (attr) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) {
triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
}
}
}
}
}
for (size_t i = 0; i < steps; i++) {
triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
}
}
}
}
}
visibility |= ob->visibility;
}
visibility |= ob->visibility;
}
float4 leaf_data[BVH_ONODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c.x);
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_ONODE_LEAF_SIZE);
}
else {
float8 *data = (float8*)&pack.nodes[idx];
bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[8] = { BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty };
int child[8];
uint child_visibility[8] = { 0 };
int num_nodes = 0;
float4 leaf_data[BVH_ONODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c.x);
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
}
else {
float8 *data = (float8 *)&pack.nodes[idx];
bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[8] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
int child[8];
uint child_visibility[8] = {0};
int num_nodes = 0;
for(int i = 0; i < 8; ++i) {
child[i] = __float_as_int(data[(is_unaligned) ? 13: 7][i]);
for (int i = 0; i < 8; ++i) {
child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
if(child[i] != 0) {
refit_node((child[i] < 0)? -child[i]-1: child[i], (child[i] < 0),
child_bbox[i], child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
}
}
if (child[i] != 0) {
refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
(child[i] < 0),
child_bbox[i],
child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
}
}
if(is_unaligned) {
Transform aligned_space[8] = { transform_identity(), transform_identity(),
transform_identity(), transform_identity(),
transform_identity(), transform_identity(),
transform_identity(), transform_identity()};
pack_unaligned_node(idx,
aligned_space,
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
}
}
if (is_unaligned) {
Transform aligned_space[8] = {transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity()};
pack_unaligned_node(
idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
}
else {
pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
}
}
}
CCL_NAMESPACE_END

View File

@@ -45,8 +45,8 @@ class LeafNode;
class Object;
class Progress;
#define BVH_ONODE_SIZE 16
#define BVH_ONODE_LEAF_SIZE 1
#define BVH_ONODE_SIZE 16
#define BVH_ONODE_LEAF_SIZE 1
#define BVH_UNALIGNED_ONODE_SIZE 28
/* BVH8
@@ -54,48 +54,44 @@ class Progress;
* Octo BVH, with each node having eight children, to use with SIMD instructions.
*/
class BVH8 : public BVH {
protected:
/* constructor */
friend class BVH;
BVH8(const BVHParams& params, const vector<Object*>& objects);
protected:
/* constructor */
friend class BVH;
BVH8(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* pack */
void pack_nodes(const BVHNode *root) override;
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
};
CCL_NAMESPACE_END
#endif /* __BVH8_H__ */
#endif /* __BVH8_H__ */

View File

@@ -29,225 +29,265 @@ CCL_NAMESPACE_BEGIN
/* SSE replacements */
__forceinline void prefetch_L1 (const void* /*ptr*/) { }
__forceinline void prefetch_L2 (const void* /*ptr*/) { }
__forceinline void prefetch_L3 (const void* /*ptr*/) { }
__forceinline void prefetch_NTA(const void* /*ptr*/) { }
template<size_t src> __forceinline float extract(const int4& b)
{ return b[src]; }
template<size_t dst> __forceinline const float4 insert(const float4& a, const float b)
{ float4 r = a; r[dst] = b; return r; }
__forceinline int get_best_dimension(const float4& bestSAH)
__forceinline void prefetch_L1(const void * /*ptr*/)
{
// return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
}
__forceinline void prefetch_L2(const void * /*ptr*/)
{
}
__forceinline void prefetch_L3(const void * /*ptr*/)
{
}
__forceinline void prefetch_NTA(const void * /*ptr*/)
{
}
float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
template<size_t src> __forceinline float extract(const int4 &b)
{
return b[src];
}
template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
{
float4 r = a;
r[dst] = b;
return r;
}
if(bestSAH.x == minSAH) return 0;
else if(bestSAH.y == minSAH) return 1;
else return 2;
__forceinline int get_best_dimension(const float4 &bestSAH)
{
// return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
if (bestSAH.x == minSAH)
return 0;
else if (bestSAH.y == minSAH)
return 1;
else
return 2;
}
/* BVH Object Binning */
BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
BVHReference *prims,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: BVHRange(job),
splitSAH(FLT_MAX),
dim(0),
pos(0),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
: BVHRange(job),
splitSAH(FLT_MAX),
dim(0),
pos(0),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
if(aligned_space_ == NULL) {
bounds_ = bounds();
cent_bounds_ = cent_bounds();
}
else {
/* TODO(sergey): With some additional storage we can avoid
* need in re-calculating this.
*/
bounds_ = unaligned_heuristic->compute_aligned_boundbox(
*this,
prims,
*aligned_space,
&cent_bounds_);
}
if (aligned_space_ == NULL) {
bounds_ = bounds();
cent_bounds_ = cent_bounds();
}
else {
/* TODO(sergey): With some additional storage we can avoid
* need in re-calculating this.
*/
bounds_ = unaligned_heuristic->compute_aligned_boundbox(
*this, prims, *aligned_space, &cent_bounds_);
}
/* compute number of bins to use and precompute scaling factor for binning */
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size()));
scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
/* compute number of bins to use and precompute scaling factor for binning */
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
/* initialize binning counter and bounds */
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
/* initialize binning counter and bounds */
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
for(size_t i = 0; i < num_bins; i++) {
bin_count[i] = make_int4(0);
bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
}
for (size_t i = 0; i < num_bins; i++) {
bin_count[i] = make_int4(0);
bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
}
/* map geometry to bins, unrolled once */
{
ssize_t i;
/* map geometry to bins, unrolled once */
{
ssize_t i;
for(i = 0; i < ssize_t(size()) - 1; i += 2) {
prefetch_L2(&prims[start() + i + 8]);
for (i = 0; i < ssize_t(size()) - 1; i += 2) {
prefetch_L2(&prims[start() + i + 8]);
/* map even and odd primitive to bin */
const BVHReference& prim0 = prims[start() + i + 0];
const BVHReference& prim1 = prims[start() + i + 1];
/* map even and odd primitive to bin */
const BVHReference &prim0 = prims[start() + i + 0];
const BVHReference &prim1 = prims[start() + i + 1];
BoundBox bounds0 = get_prim_bounds(prim0);
BoundBox bounds1 = get_prim_bounds(prim1);
BoundBox bounds0 = get_prim_bounds(prim0);
BoundBox bounds1 = get_prim_bounds(prim1);
int4 bin0 = get_bin(bounds0);
int4 bin1 = get_bin(bounds1);
int4 bin0 = get_bin(bounds0);
int4 bin1 = get_bin(bounds1);
/* increase bounds for bins for even primitive */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
/* increase bounds for bins for even primitive */
int b00 = (int)extract<0>(bin0);
bin_count[b00][0]++;
bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
/* increase bounds of bins for odd primitive */
int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(bounds1);
int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(bounds1);
}
/* increase bounds of bins for odd primitive */
int b10 = (int)extract<0>(bin1);
bin_count[b10][0]++;
bin_bounds[b10][0].grow(bounds1);
int b11 = (int)extract<1>(bin1);
bin_count[b11][1]++;
bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1);
bin_count[b12][2]++;
bin_bounds[b12][2].grow(bounds1);
}
/* for uneven number of primitives */
if(i < ssize_t(size())) {
/* map primitive to bin */
const BVHReference& prim0 = prims[start() + i];
BoundBox bounds0 = get_prim_bounds(prim0);
int4 bin0 = get_bin(bounds0);
/* for uneven number of primitives */
if (i < ssize_t(size())) {
/* map primitive to bin */
const BVHReference &prim0 = prims[start() + i];
BoundBox bounds0 = get_prim_bounds(prim0);
int4 bin0 = get_bin(bounds0);
/* increase bounds of bins */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
}
}
/* increase bounds of bins */
int b00 = (int)extract<0>(bin0);
bin_count[b00][0]++;
bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
}
}
/* sweep from right to left and compute parallel prefix of merged bounds */
float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
float4 r_count[MAX_BINS]; /* number of primitives on the right */
int4 count = make_int4(0);
/* sweep from right to left and compute parallel prefix of merged bounds */
float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
float4 r_count[MAX_BINS]; /* number of primitives on the right */
int4 count = make_int4(0);
BoundBox bx = BoundBox::empty;
BoundBox by = BoundBox::empty;
BoundBox bz = BoundBox::empty;
BoundBox bx = BoundBox::empty;
BoundBox by = BoundBox::empty;
BoundBox bz = BoundBox::empty;
for(size_t i = num_bins - 1; i > 0; i--) {
count = count + bin_count[i];
r_count[i] = blocks(count);
for (size_t i = num_bins - 1; i > 0; i--) {
count = count + bin_count[i];
r_count[i] = blocks(count);
bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area();
by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area();
bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area();
r_area[i][3] = r_area[i][2];
}
bx = merge(bx, bin_bounds[i][0]);
r_area[i][0] = bx.half_area();
by = merge(by, bin_bounds[i][1]);
r_area[i][1] = by.half_area();
bz = merge(bz, bin_bounds[i][2]);
r_area[i][2] = bz.half_area();
r_area[i][3] = r_area[i][2];
}
/* sweep from left to right and compute SAH */
int4 ii = make_int4(1);
float4 bestSAH = make_float4(FLT_MAX);
int4 bestSplit = make_int4(-1);
/* sweep from left to right and compute SAH */
int4 ii = make_int4(1);
float4 bestSAH = make_float4(FLT_MAX);
int4 bestSplit = make_int4(-1);
count = make_int4(0);
count = make_int4(0);
bx = BoundBox::empty;
by = BoundBox::empty;
bz = BoundBox::empty;
bx = BoundBox::empty;
by = BoundBox::empty;
bz = BoundBox::empty;
for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
count = count + bin_count[i-1];
for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
count = count + bin_count[i - 1];
bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area();
by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area();
bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area();
bx = merge(bx, bin_bounds[i - 1][0]);
float Ax = bx.half_area();
by = merge(by, bin_bounds[i - 1][1]);
float Ay = by.half_area();
bz = merge(bz, bin_bounds[i - 1][2]);
float Az = bz.half_area();
float4 lCount = blocks(count);
float4 lArea = make_float4(Ax,Ay,Az,Az);
float4 sah = lArea*lCount + r_area[i]*r_count[i];
float4 lCount = blocks(count);
float4 lArea = make_float4(Ax, Ay, Az, Az);
float4 sah = lArea * lCount + r_area[i] * r_count[i];
bestSplit = select(sah < bestSAH,ii,bestSplit);
bestSAH = min(sah,bestSAH);
}
bestSplit = select(sah < bestSAH, ii, bestSplit);
bestSAH = min(sah, bestSAH);
}
int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
/* find best dimension */
dim = get_best_dimension(bestSAH);
splitSAH = bestSAH[dim];
pos = bestSplit[dim];
leafSAH = bounds_.half_area() * blocks(size());
/* find best dimension */
dim = get_best_dimension(bestSAH);
splitSAH = bestSAH[dim];
pos = bestSplit[dim];
leafSAH = bounds_.half_area() * blocks(size());
}
void BVHObjectBinning::split(BVHReference* prims,
BVHObjectBinning& left_o,
BVHObjectBinning& right_o) const
void BVHObjectBinning::split(BVHReference *prims,
BVHObjectBinning &left_o,
BVHObjectBinning &right_o) const
{
size_t N = size();
size_t N = size();
BoundBox lgeom_bounds = BoundBox::empty;
BoundBox rgeom_bounds = BoundBox::empty;
BoundBox lcent_bounds = BoundBox::empty;
BoundBox rcent_bounds = BoundBox::empty;
BoundBox lgeom_bounds = BoundBox::empty;
BoundBox rgeom_bounds = BoundBox::empty;
BoundBox lcent_bounds = BoundBox::empty;
BoundBox rcent_bounds = BoundBox::empty;
ssize_t l = 0, r = N-1;
ssize_t l = 0, r = N - 1;
while(l <= r) {
prefetch_L2(&prims[start() + l + 8]);
prefetch_L2(&prims[start() + r - 8]);
while (l <= r) {
prefetch_L2(&prims[start() + l + 8]);
prefetch_L2(&prims[start() + r - 8]);
BVHReference prim = prims[start() + l];
BoundBox unaligned_bounds = get_prim_bounds(prim);
float3 unaligned_center = unaligned_bounds.center2();
float3 center = prim.bounds().center2();
BVHReference prim = prims[start() + l];
BoundBox unaligned_bounds = get_prim_bounds(prim);
float3 unaligned_center = unaligned_bounds.center2();
float3 center = prim.bounds().center2();
if(get_bin(unaligned_center)[dim] < pos) {
lgeom_bounds.grow(prim.bounds());
lcent_bounds.grow(center);
l++;
}
else {
rgeom_bounds.grow(prim.bounds());
rcent_bounds.grow(center);
swap(prims[start()+l],prims[start()+r]);
r--;
}
}
/* finish */
if(l != 0 && N-1-r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
return;
}
if (get_bin(unaligned_center)[dim] < pos) {
lgeom_bounds.grow(prim.bounds());
lcent_bounds.grow(center);
l++;
}
else {
rgeom_bounds.grow(prim.bounds());
rcent_bounds.grow(center);
swap(prims[start() + l], prims[start() + r]);
r--;
}
}
/* finish */
if (l != 0 && N - 1 - r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
return;
}
/* object medium split if we did not make progress, can happen when all
* primitives have same centroid */
lgeom_bounds = BoundBox::empty;
rgeom_bounds = BoundBox::empty;
lcent_bounds = BoundBox::empty;
rcent_bounds = BoundBox::empty;
/* object medium split if we did not make progress, can happen when all
* primitives have same centroid */
lgeom_bounds = BoundBox::empty;
rgeom_bounds = BoundBox::empty;
lcent_bounds = BoundBox::empty;
rcent_bounds = BoundBox::empty;
for(size_t i = 0; i < N/2; i++) {
lgeom_bounds.grow(prims[start()+i].bounds());
lcent_bounds.grow(prims[start()+i].bounds().center2());
}
for (size_t i = 0; i < N / 2; i++) {
lgeom_bounds.grow(prims[start() + i].bounds());
lcent_bounds.grow(prims[start() + i].bounds().center2());
}
for(size_t i = N/2; i < N; i++) {
rgeom_bounds.grow(prims[start()+i].bounds());
rcent_bounds.grow(prims[start()+i].bounds().center2());
}
for (size_t i = N / 2; i < N; i++) {
rgeom_bounds.grow(prims[start() + i].bounds());
rcent_bounds.grow(prims[start() + i].bounds().center2());
}
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims);
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
}
CCL_NAMESPACE_END

View File

@@ -34,81 +34,82 @@ class BVHBuild;
* location to different sets. The SAH is evaluated by computing the number of
* blocks occupied by the primitives in the partitions. */
class BVHObjectBinning : public BVHRange
{
public:
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX) {}
class BVHObjectBinning : public BVHRange {
public:
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX)
{
}
BVHObjectBinning(const BVHRange& job,
BVHReference *prims,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
BVHObjectBinning(const BVHRange &job,
BVHReference *prims,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHReference *prims,
BVHObjectBinning& left_o,
BVHObjectBinning& right_o) const;
void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const;
__forceinline const BoundBox& unaligned_bounds() { return bounds_; }
__forceinline const BoundBox &unaligned_bounds()
{
return bounds_;
}
float splitSAH; /* SAH cost of the best split */
float leafSAH; /* SAH cost of creating a leaf */
float splitSAH; /* SAH cost of the best split */
float leafSAH; /* SAH cost of creating a leaf */
protected:
int dim; /* best split dimension */
int pos; /* best split position */
size_t num_bins; /* actual number of bins to use */
float3 scale; /* scaling factor to compute bin */
protected:
int dim; /* best split dimension */
int pos; /* best split position */
size_t num_bins; /* actual number of bins to use */
float3 scale; /* scaling factor to compute bin */
/* Effective bounds and centroid bounds. */
BoundBox bounds_;
BoundBox cent_bounds_;
/* Effective bounds and centroid bounds. */
BoundBox bounds_;
BoundBox cent_bounds_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
enum { MAX_BINS = 32 };
enum { LOG_BLOCK_SIZE = 2 };
enum { MAX_BINS = 32 };
enum { LOG_BLOCK_SIZE = 2 };
/* computes the bin numbers for each dimension for a box. */
__forceinline int4 get_bin(const BoundBox& box) const
{
int4 a = make_int4((box.center2() - cent_bounds_.min)*scale - make_float3(0.5f));
int4 mn = make_int4(0);
int4 mx = make_int4((int)num_bins-1);
/* computes the bin numbers for each dimension for a box. */
__forceinline int4 get_bin(const BoundBox &box) const
{
int4 a = make_int4((box.center2() - cent_bounds_.min) * scale - make_float3(0.5f));
int4 mn = make_int4(0);
int4 mx = make_int4((int)num_bins - 1);
return clamp(a, mn, mx);
}
return clamp(a, mn, mx);
}
/* computes the bin numbers for each dimension for a point. */
__forceinline int4 get_bin(const float3& c) const
{
return make_int4((c - cent_bounds_.min)*scale - make_float3(0.5f));
}
/* computes the bin numbers for each dimension for a point. */
__forceinline int4 get_bin(const float3 &c) const
{
return make_int4((c - cent_bounds_.min) * scale - make_float3(0.5f));
}
/* compute the number of blocks occupied for each dimension. */
__forceinline float4 blocks(const int4& a) const
{
return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
}
/* compute the number of blocks occupied for each dimension. */
__forceinline float4 blocks(const int4 &a) const
{
return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
}
/* compute the number of blocks occupied in one dimension. */
__forceinline int blocks(size_t a) const
{
return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
}
/* compute the number of blocks occupied in one dimension. */
__forceinline int blocks(size_t a) const
{
return (int)((a + ((1LL << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
}
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
{
if(aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
}
}
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
if (aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
};
CCL_NAMESPACE_END
#endif /* __BVH_BINNING_H__ */
#endif /* __BVH_BINNING_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -41,106 +41,101 @@ class Progress;
/* BVH Builder */
class BVHBuild
{
public:
/* Constructor/Destructor */
BVHBuild(const vector<Object*>& objects,
array<int>& prim_type,
array<int>& prim_index,
array<int>& prim_object,
array<float2>& prim_time,
const BVHParams& params,
Progress& progress);
~BVHBuild();
class BVHBuild {
public:
/* Constructor/Destructor */
BVHBuild(const vector<Object *> &objects,
array<int> &prim_type,
array<int> &prim_index,
array<int> &prim_object,
array<float2> &prim_time,
const BVHParams &params,
Progress &progress);
~BVHBuild();
BVHNode *run();
BVHNode *run();
protected:
friend class BVHMixedSplit;
friend class BVHObjectSplit;
friend class BVHSpatialSplit;
friend class BVHBuildTask;
friend class BVHSpatialSplitBuildTask;
friend class BVHObjectBinning;
protected:
friend class BVHMixedSplit;
friend class BVHObjectSplit;
friend class BVHSpatialSplit;
friend class BVHBuildTask;
friend class BVHSpatialSplitBuildTask;
friend class BVHObjectBinning;
/* Adding references. */
void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
void add_references(BVHRange& root);
/* Adding references. */
void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
void add_references(BVHRange &root);
/* Building. */
BVHNode *build_node(const BVHRange& range,
vector<BVHReference> *references,
int level,
int thread_id);
BVHNode *build_node(const BVHObjectBinning& range, int level);
BVHNode *create_leaf_node(const BVHRange& range,
const vector<BVHReference>& references);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
/* Building. */
BVHNode *build_node(const BVHRange &range,
vector<BVHReference> *references,
int level,
int thread_id);
BVHNode *build_node(const BVHObjectBinning &range, int level);
BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
bool range_within_max_leaf_size(const BVHRange& range,
const vector<BVHReference>& references) const;
bool range_within_max_leaf_size(const BVHRange &range,
const vector<BVHReference> &references) const;
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node,
int child,
BVHObjectBinning *range,
int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
BVHRange *range,
vector<BVHReference> *references,
int level,
int thread_id);
thread_mutex build_mutex;
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
BVHRange *range,
vector<BVHReference> *references,
int level,
int thread_id);
thread_mutex build_mutex;
/* Progress. */
void progress_update();
/* Progress. */
void progress_update();
/* Tree rotations. */
void rotate(BVHNode *node, int max_depth);
void rotate(BVHNode *node, int max_depth, int iterations);
/* Tree rotations. */
void rotate(BVHNode *node, int max_depth);
void rotate(BVHNode *node, int max_depth, int iterations);
/* Objects and primitive references. */
vector<Object*> objects;
vector<BVHReference> references;
int num_original_references;
/* Objects and primitive references. */
vector<Object *> objects;
vector<BVHReference> references;
int num_original_references;
/* Output primitive indexes and objects. */
array<int>& prim_type;
array<int>& prim_index;
array<int>& prim_object;
array<float2>& prim_time;
/* Output primitive indexes and objects. */
array<int> &prim_type;
array<int> &prim_index;
array<int> &prim_object;
array<float2> &prim_time;
bool need_prim_time;
bool need_prim_time;
/* Build parameters. */
BVHParams params;
/* Build parameters. */
BVHParams params;
/* Progress reporting. */
Progress& progress;
double progress_start_time;
size_t progress_count;
size_t progress_total;
size_t progress_original_total;
/* Progress reporting. */
Progress &progress;
double progress_start_time;
size_t progress_count;
size_t progress_total;
size_t progress_original_total;
/* Spatial splitting. */
float spatial_min_overlap;
vector<BVHSpatialStorage> spatial_storage;
size_t spatial_free_index;
thread_spin_lock spatial_spin_lock;
/* Spatial splitting. */
float spatial_min_overlap;
vector<BVHSpatialStorage> spatial_storage;
size_t spatial_free_index;
thread_spin_lock spatial_spin_lock;
/* Threads. */
TaskPool task_pool;
/* Threads. */
TaskPool task_pool;
/* Unaligned building. */
BVHUnaligned unaligned_heuristic;
/* Unaligned building. */
BVHUnaligned unaligned_heuristic;
};
CCL_NAMESPACE_END
#endif /* __BVH_BUILD_H__ */
#endif /* __BVH_BUILD_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -19,65 +19,68 @@
#ifdef WITH_EMBREE
#include <embree3/rtcore.h>
#include <embree3/rtcore_scene.h>
# include <embree3/rtcore.h>
# include <embree3/rtcore_scene.h>
#include "bvh/bvh.h"
#include "bvh/bvh_params.h"
# include "bvh/bvh.h"
# include "bvh/bvh_params.h"
#include "util/util_thread.h"
#include "util/util_types.h"
#include "util/util_vector.h"
# include "util/util_thread.h"
# include "util/util_types.h"
# include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class Mesh;
class BVHEmbree : public BVH
{
public:
virtual void build(Progress& progress, Stats *stats) override;
virtual ~BVHEmbree();
RTCScene scene;
static void destroy(RTCScene);
class BVHEmbree : public BVH {
public:
virtual void build(Progress &progress, Stats *stats) override;
virtual ~BVHEmbree();
RTCScene scene;
static void destroy(RTCScene);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
protected:
friend class BVH;
BVHEmbree(const BVHParams& params, const vector<Object*>& objects);
protected:
friend class BVH;
BVHEmbree(const BVHParams &params, const vector<Object *> &objects);
virtual void pack_nodes(const BVHNode*) override;
virtual void refit_nodes() override;
virtual void pack_nodes(const BVHNode *) override;
virtual void refit_nodes() override;
void add_object(Object *ob, int i);
void add_instance(Object *ob, int i);
void add_curves(Object *ob, int i);
void add_triangles(Object *ob, int i);
void add_object(Object *ob, int i);
void add_instance(Object *ob, int i);
void add_curves(Object *ob, int i);
void add_triangles(Object *ob, int i);
ssize_t mem_used;
ssize_t mem_used;
void add_delayed_delete_scene(RTCScene scene) { delayed_delete_scenes.push_back(scene); }
BVHEmbree *top_level;
private:
void delete_rtcScene();
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
void add_delayed_delete_scene(RTCScene scene)
{
delayed_delete_scenes.push_back(scene);
}
BVHEmbree *top_level;
static RTCDevice rtc_shared_device;
static int rtc_shared_users;
static thread_mutex rtc_shared_mutex;
private:
void delete_rtcScene();
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
Stats *stats;
vector<RTCScene> delayed_delete_scenes;
int curve_subdivisions;
enum RTCBuildQuality build_quality;
bool use_curves, use_ribbons, dynamic_scene;
static RTCDevice rtc_shared_device;
static int rtc_shared_users;
static thread_mutex rtc_shared_mutex;
Stats *stats;
vector<RTCScene> delayed_delete_scenes;
int curve_subdivisions;
enum RTCBuildQuality build_quality;
bool use_curves, use_ribbons, dynamic_scene;
};
CCL_NAMESPACE_END
#endif /* WITH_EMBREE */
#endif /* WITH_EMBREE */
#endif /* __BVH_EMBREE_H__ */
#endif /* __BVH_EMBREE_H__ */

View File

@@ -28,199 +28,197 @@ CCL_NAMESPACE_BEGIN
int BVHNode::getSubtreeSize(BVH_STAT stat) const
{
int cnt = 0;
int cnt = 0;
switch(stat)
{
case BVH_STAT_NODE_COUNT:
cnt = 1;
break;
case BVH_STAT_LEAF_COUNT:
cnt = is_leaf() ? 1 : 0;
break;
case BVH_STAT_INNER_COUNT:
cnt = is_leaf() ? 0 : 1;
break;
case BVH_STAT_TRIANGLE_COUNT:
cnt = is_leaf() ? reinterpret_cast<const LeafNode*>(this)->num_triangles() : 0;
break;
case BVH_STAT_CHILDNODE_COUNT:
cnt = num_children();
break;
case BVH_STAT_ALIGNED_COUNT:
if(!is_unaligned) {
cnt = 1;
}
break;
case BVH_STAT_UNALIGNED_COUNT:
if(is_unaligned) {
cnt = 1;
}
break;
case BVH_STAT_ALIGNED_INNER_COUNT:
if(!is_leaf()) {
bool has_unaligned = false;
for(int j = 0; j < num_children(); j++) {
has_unaligned |= get_child(j)->is_unaligned;
}
cnt += has_unaligned? 0: 1;
}
break;
case BVH_STAT_UNALIGNED_INNER_COUNT:
if(!is_leaf()) {
bool has_unaligned = false;
for(int j = 0; j < num_children(); j++) {
has_unaligned |= get_child(j)->is_unaligned;
}
cnt += has_unaligned? 1: 0;
}
break;
case BVH_STAT_ALIGNED_LEAF_COUNT:
cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
break;
case BVH_STAT_UNALIGNED_LEAF_COUNT:
cnt = (is_leaf() && is_unaligned) ? 1 : 0;
break;
case BVH_STAT_DEPTH:
if(is_leaf()) {
cnt = 1;
}
else {
for(int i = 0; i < num_children(); i++) {
cnt = max(cnt, get_child(i)->getSubtreeSize(stat));
}
cnt += 1;
}
return cnt;
default:
assert(0); /* unknown mode */
}
switch (stat) {
case BVH_STAT_NODE_COUNT:
cnt = 1;
break;
case BVH_STAT_LEAF_COUNT:
cnt = is_leaf() ? 1 : 0;
break;
case BVH_STAT_INNER_COUNT:
cnt = is_leaf() ? 0 : 1;
break;
case BVH_STAT_TRIANGLE_COUNT:
cnt = is_leaf() ? reinterpret_cast<const LeafNode *>(this)->num_triangles() : 0;
break;
case BVH_STAT_CHILDNODE_COUNT:
cnt = num_children();
break;
case BVH_STAT_ALIGNED_COUNT:
if (!is_unaligned) {
cnt = 1;
}
break;
case BVH_STAT_UNALIGNED_COUNT:
if (is_unaligned) {
cnt = 1;
}
break;
case BVH_STAT_ALIGNED_INNER_COUNT:
if (!is_leaf()) {
bool has_unaligned = false;
for (int j = 0; j < num_children(); j++) {
has_unaligned |= get_child(j)->is_unaligned;
}
cnt += has_unaligned ? 0 : 1;
}
break;
case BVH_STAT_UNALIGNED_INNER_COUNT:
if (!is_leaf()) {
bool has_unaligned = false;
for (int j = 0; j < num_children(); j++) {
has_unaligned |= get_child(j)->is_unaligned;
}
cnt += has_unaligned ? 1 : 0;
}
break;
case BVH_STAT_ALIGNED_LEAF_COUNT:
cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
break;
case BVH_STAT_UNALIGNED_LEAF_COUNT:
cnt = (is_leaf() && is_unaligned) ? 1 : 0;
break;
case BVH_STAT_DEPTH:
if (is_leaf()) {
cnt = 1;
}
else {
for (int i = 0; i < num_children(); i++) {
cnt = max(cnt, get_child(i)->getSubtreeSize(stat));
}
cnt += 1;
}
return cnt;
default:
assert(0); /* unknown mode */
}
if(!is_leaf())
for(int i = 0; i < num_children(); i++)
cnt += get_child(i)->getSubtreeSize(stat);
if (!is_leaf())
for (int i = 0; i < num_children(); i++)
cnt += get_child(i)->getSubtreeSize(stat);
return cnt;
return cnt;
}
void BVHNode::deleteSubtree()
{
for(int i = 0; i < num_children(); i++)
if(get_child(i))
get_child(i)->deleteSubtree();
for (int i = 0; i < num_children(); i++)
if (get_child(i))
get_child(i)->deleteSubtree();
delete this;
delete this;
}
float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) const
float BVHNode::computeSubtreeSAHCost(const BVHParams &p, float probability) const
{
float SAH = probability * p.cost(num_children(), num_triangles());
float SAH = probability * p.cost(num_children(), num_triangles());
for(int i = 0; i < num_children(); i++) {
BVHNode *child = get_child(i);
SAH += child->computeSubtreeSAHCost(p, probability * child->bounds.safe_area()/bounds.safe_area());
}
for (int i = 0; i < num_children(); i++) {
BVHNode *child = get_child(i);
SAH += child->computeSubtreeSAHCost(
p, probability * child->bounds.safe_area() / bounds.safe_area());
}
return SAH;
return SAH;
}
uint BVHNode::update_visibility()
{
if(!is_leaf() && visibility == 0) {
InnerNode *inner = (InnerNode*)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
if (!is_leaf() && visibility == 0) {
InnerNode *inner = (InnerNode *)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
visibility = child0->update_visibility()|child1->update_visibility();
}
visibility = child0->update_visibility() | child1->update_visibility();
}
return visibility;
return visibility;
}
void BVHNode::update_time()
{
if(!is_leaf()) {
InnerNode *inner = (InnerNode*)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
child0->update_time();
child1->update_time();
time_from = min(child0->time_from, child1->time_from);
time_to = max(child0->time_to, child1->time_to);
}
if (!is_leaf()) {
InnerNode *inner = (InnerNode *)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
child0->update_time();
child1->update_time();
time_from = min(child0->time_from, child1->time_from);
time_to = max(child0->time_to, child1->time_to);
}
}
namespace {
struct DumpTraversalContext {
/* Descriptor of wile where writing is happening. */
FILE *stream;
/* Unique identifier of the node current. */
int id;
/* Descriptor of wile where writing is happening. */
FILE *stream;
/* Unique identifier of the node current. */
int id;
};
void dump_subtree(DumpTraversalContext *context,
const BVHNode *node,
const BVHNode *parent = NULL)
void dump_subtree(DumpTraversalContext *context, const BVHNode *node, const BVHNode *parent = NULL)
{
if(node->is_leaf()) {
fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n",
node,
context->id);
}
else {
fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#cceecc\",style=filled]\n",
node,
context->id);
}
if(parent != NULL) {
fprintf(context->stream, " node_%p -> node_%p;\n", parent, node);
}
context->id += 1;
for(int i = 0; i < node->num_children(); ++i) {
dump_subtree(context, node->get_child(i), node);
}
if (node->is_leaf()) {
fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n",
node,
context->id);
}
else {
fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#cceecc\",style=filled]\n",
node,
context->id);
}
if (parent != NULL) {
fprintf(context->stream, " node_%p -> node_%p;\n", parent, node);
}
context->id += 1;
for (int i = 0; i < node->num_children(); ++i) {
dump_subtree(context, node->get_child(i), node);
}
}
} // namespace
void BVHNode::dump_graph(const char *filename)
{
DumpTraversalContext context;
context.stream = fopen(filename, "w");
if(context.stream == NULL) {
return;
}
context.id = 0;
fprintf(context.stream, "digraph BVH {\n");
dump_subtree(&context, this);
fprintf(context.stream, "}\n");
fclose(context.stream);
DumpTraversalContext context;
context.stream = fopen(filename, "w");
if (context.stream == NULL) {
return;
}
context.id = 0;
fprintf(context.stream, "digraph BVH {\n");
dump_subtree(&context, this);
fprintf(context.stream, "}\n");
fclose(context.stream);
}
/* Inner Node */
void InnerNode::print(int depth) const
{
for(int i = 0; i < depth; i++)
printf(" ");
for (int i = 0; i < depth; i++)
printf(" ");
printf("inner node %p\n", (void*)this);
printf("inner node %p\n", (void *)this);
if(children[0])
children[0]->print(depth+1);
if(children[1])
children[1]->print(depth+1);
if (children[0])
children[0]->print(depth + 1);
if (children[1])
children[1]->print(depth + 1);
}
void LeafNode::print(int depth) const
{
for(int i = 0; i < depth; i++)
printf(" ");
for (int i = 0; i < depth; i++)
printf(" ");
printf("leaf node %d to %d\n", lo, hi);
printf("leaf node %d to %d\n", lo, hi);
}
CCL_NAMESPACE_END

View File

@@ -24,227 +24,232 @@
CCL_NAMESPACE_BEGIN
enum BVH_STAT {
BVH_STAT_NODE_COUNT,
BVH_STAT_INNER_COUNT,
BVH_STAT_LEAF_COUNT,
BVH_STAT_TRIANGLE_COUNT,
BVH_STAT_CHILDNODE_COUNT,
BVH_STAT_ALIGNED_COUNT,
BVH_STAT_UNALIGNED_COUNT,
BVH_STAT_ALIGNED_INNER_COUNT,
BVH_STAT_UNALIGNED_INNER_COUNT,
BVH_STAT_ALIGNED_LEAF_COUNT,
BVH_STAT_UNALIGNED_LEAF_COUNT,
BVH_STAT_DEPTH,
BVH_STAT_NODE_COUNT,
BVH_STAT_INNER_COUNT,
BVH_STAT_LEAF_COUNT,
BVH_STAT_TRIANGLE_COUNT,
BVH_STAT_CHILDNODE_COUNT,
BVH_STAT_ALIGNED_COUNT,
BVH_STAT_UNALIGNED_COUNT,
BVH_STAT_ALIGNED_INNER_COUNT,
BVH_STAT_UNALIGNED_INNER_COUNT,
BVH_STAT_ALIGNED_LEAF_COUNT,
BVH_STAT_UNALIGNED_LEAF_COUNT,
BVH_STAT_DEPTH,
};
class BVHParams;
class BVHNode
{
public:
virtual ~BVHNode()
{
delete aligned_space;
}
class BVHNode {
public:
virtual ~BVHNode()
{
delete aligned_space;
}
virtual bool is_leaf() const = 0;
virtual int num_children() const = 0;
virtual BVHNode *get_child(int i) const = 0;
virtual int num_triangles() const { return 0; }
virtual void print(int depth = 0) const = 0;
virtual bool is_leaf() const = 0;
virtual int num_children() const = 0;
virtual BVHNode *get_child(int i) const = 0;
virtual int num_triangles() const
{
return 0;
}
virtual void print(int depth = 0) const = 0;
inline void set_aligned_space(const Transform& aligned_space)
{
is_unaligned = true;
if(this->aligned_space == NULL) {
this->aligned_space = new Transform(aligned_space);
}
else {
*this->aligned_space = aligned_space;
}
}
inline void set_aligned_space(const Transform &aligned_space)
{
is_unaligned = true;
if (this->aligned_space == NULL) {
this->aligned_space = new Transform(aligned_space);
}
else {
*this->aligned_space = aligned_space;
}
}
inline Transform get_aligned_space() const
{
if(aligned_space == NULL) {
return transform_identity();
}
return *aligned_space;
}
inline Transform get_aligned_space() const
{
if (aligned_space == NULL) {
return transform_identity();
}
return *aligned_space;
}
inline bool has_unaligned() const
{
if(is_leaf()) {
return false;
}
for(int i = 0; i < num_children(); ++i) {
if(get_child(i)->is_unaligned) {
return true;
}
}
return false;
}
inline bool has_unaligned() const
{
if (is_leaf()) {
return false;
}
for (int i = 0; i < num_children(); ++i) {
if (get_child(i)->is_unaligned) {
return true;
}
}
return false;
}
// Subtree functions
int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const;
void deleteSubtree();
// Subtree functions
int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f) const;
void deleteSubtree();
uint update_visibility();
void update_time();
uint update_visibility();
void update_time();
/* Dump the content of the tree as a graphviz file. */
void dump_graph(const char *filename);
/* Dump the content of the tree as a graphviz file. */
void dump_graph(const char *filename);
// Properties.
BoundBox bounds;
uint visibility;
// Properties.
BoundBox bounds;
uint visibility;
bool is_unaligned;
bool is_unaligned;
/* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
* utilities and type defines in util_transform first.
*/
Transform *aligned_space;
/* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
* utilities and type defines in util_transform first.
*/
Transform *aligned_space;
float time_from, time_to;
float time_from, time_to;
protected:
explicit BVHNode(const BoundBox& bounds)
: bounds(bounds),
visibility(0),
is_unaligned(false),
aligned_space(NULL),
time_from(0.0f),
time_to(1.0f)
{
}
protected:
explicit BVHNode(const BoundBox &bounds)
: bounds(bounds),
visibility(0),
is_unaligned(false),
aligned_space(NULL),
time_from(0.0f),
time_to(1.0f)
{
}
explicit BVHNode(const BVHNode& other)
: bounds(other.bounds),
visibility(other.visibility),
is_unaligned(other.is_unaligned),
aligned_space(NULL),
time_from(other.time_from),
time_to(other.time_to)
{
if(other.aligned_space != NULL) {
assert(other.is_unaligned);
aligned_space = new Transform();
*aligned_space = *other.aligned_space;
}
else {
assert(!other.is_unaligned);
}
}
explicit BVHNode(const BVHNode &other)
: bounds(other.bounds),
visibility(other.visibility),
is_unaligned(other.is_unaligned),
aligned_space(NULL),
time_from(other.time_from),
time_to(other.time_to)
{
if (other.aligned_space != NULL) {
assert(other.is_unaligned);
aligned_space = new Transform();
*aligned_space = *other.aligned_space;
}
else {
assert(!other.is_unaligned);
}
}
};
class InnerNode : public BVHNode
{
public:
static constexpr int kNumMaxChildren = 8;
class InnerNode : public BVHNode {
public:
static constexpr int kNumMaxChildren = 8;
InnerNode(const BoundBox& bounds,
BVHNode* child0,
BVHNode* child1)
: BVHNode(bounds),
num_children_(2)
{
children[0] = child0;
children[1] = child1;
reset_unused_children();
InnerNode(const BoundBox &bounds, BVHNode *child0, BVHNode *child1)
: BVHNode(bounds), num_children_(2)
{
children[0] = child0;
children[1] = child1;
reset_unused_children();
if(child0 && child1) {
visibility = child0->visibility | child1->visibility;
}
else {
/* Happens on build cancel. */
visibility = 0;
}
}
if (child0 && child1) {
visibility = child0->visibility | child1->visibility;
}
else {
/* Happens on build cancel. */
visibility = 0;
}
}
InnerNode(const BoundBox& bounds,
BVHNode** children,
const int num_children)
: BVHNode(bounds),
num_children_(num_children)
{
visibility = 0;
time_from = FLT_MAX;
time_to = -FLT_MAX;
for(int i = 0; i < num_children; ++i) {
assert(children[i] != NULL);
visibility |= children[i]->visibility;
this->children[i] = children[i];
time_from = min(time_from, children[i]->time_from);
time_to = max(time_to, children[i]->time_to);
}
reset_unused_children();
}
InnerNode(const BoundBox &bounds, BVHNode **children, const int num_children)
: BVHNode(bounds), num_children_(num_children)
{
visibility = 0;
time_from = FLT_MAX;
time_to = -FLT_MAX;
for (int i = 0; i < num_children; ++i) {
assert(children[i] != NULL);
visibility |= children[i]->visibility;
this->children[i] = children[i];
time_from = min(time_from, children[i]->time_from);
time_to = max(time_to, children[i]->time_to);
}
reset_unused_children();
}
/* NOTE: This function is only used during binary BVH builder, and it
* supposed to be configured to have 2 children which will be filled in in a
* bit. But this is important to have children reset to NULL. */
explicit InnerNode(const BoundBox& bounds)
: BVHNode(bounds),
num_children_(0)
{
reset_unused_children();
visibility = 0;
num_children_ = 2;
}
/* NOTE: This function is only used during binary BVH builder, and it
* supposed to be configured to have 2 children which will be filled in in a
* bit. But this is important to have children reset to NULL. */
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
{
reset_unused_children();
visibility = 0;
num_children_ = 2;
}
bool is_leaf() const { return false; }
int num_children() const { return num_children_; }
BVHNode *get_child(int i) const
{
assert(i >= 0 && i < num_children_);
return children[i];
}
void print(int depth) const;
bool is_leaf() const
{
return false;
}
int num_children() const
{
return num_children_;
}
BVHNode *get_child(int i) const
{
assert(i >= 0 && i < num_children_);
return children[i];
}
void print(int depth) const;
int num_children_;
BVHNode *children[kNumMaxChildren];
int num_children_;
BVHNode *children[kNumMaxChildren];
protected:
void reset_unused_children()
{
for(int i = num_children_; i < kNumMaxChildren; ++i) {
children[i] = NULL;
}
}
protected:
void reset_unused_children()
{
for (int i = num_children_; i < kNumMaxChildren; ++i) {
children[i] = NULL;
}
}
};
class LeafNode : public BVHNode
{
public:
LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi)
: BVHNode(bounds),
lo(lo),
hi(hi)
{
this->bounds = bounds;
this->visibility = visibility;
}
class LeafNode : public BVHNode {
public:
LeafNode(const BoundBox &bounds, uint visibility, int lo, int hi)
: BVHNode(bounds), lo(lo), hi(hi)
{
this->bounds = bounds;
this->visibility = visibility;
}
LeafNode(const LeafNode& other)
: BVHNode(other),
lo(other.lo),
hi(other.hi)
{
}
LeafNode(const LeafNode &other) : BVHNode(other), lo(other.lo), hi(other.hi)
{
}
bool is_leaf() const { return true; }
int num_children() const { return 0; }
BVHNode *get_child(int) const { return NULL; }
int num_triangles() const { return hi - lo; }
void print(int depth) const;
bool is_leaf() const
{
return true;
}
int num_children() const
{
return 0;
}
BVHNode *get_child(int) const
{
return NULL;
}
int num_triangles() const
{
return hi - lo;
}
void print(int depth) const;
int lo;
int hi;
int lo;
int hi;
};
CCL_NAMESPACE_END
#endif /* __BVH_NODE_H__ */
#endif /* __BVH_NODE_H__ */

View File

@@ -43,120 +43,121 @@ const char *bvh_layout_name(BVHLayout layout);
/* BVH Parameters */
class BVHParams
{
public:
class BVHParams {
public:
/* spatial split area threshold */
bool use_spatial_split;
float spatial_split_alpha;
/* spatial split area threshold */
bool use_spatial_split;
float spatial_split_alpha;
/* Unaligned nodes creation threshold */
float unaligned_split_threshold;
/* Unaligned nodes creation threshold */
float unaligned_split_threshold;
/* SAH costs */
float sah_node_cost;
float sah_primitive_cost;
/* SAH costs */
float sah_node_cost;
float sah_primitive_cost;
/* number of primitives in leaf */
int min_leaf_size;
int max_triangle_leaf_size;
int max_motion_triangle_leaf_size;
int max_curve_leaf_size;
int max_motion_curve_leaf_size;
/* number of primitives in leaf */
int min_leaf_size;
int max_triangle_leaf_size;
int max_motion_triangle_leaf_size;
int max_curve_leaf_size;
int max_motion_curve_leaf_size;
/* object or mesh level bvh */
bool top_level;
/* object or mesh level bvh */
bool top_level;
/* BVH layout to be built. */
BVHLayout bvh_layout;
/* BVH layout to be built. */
BVHLayout bvh_layout;
/* Mask of primitives to be included into the BVH. */
int primitive_mask;
/* Mask of primitives to be included into the BVH. */
int primitive_mask;
/* Use unaligned bounding boxes.
* Only used for curves BVH.
*/
bool use_unaligned_nodes;
/* Use unaligned bounding boxes.
* Only used for curves BVH.
*/
bool use_unaligned_nodes;
/* Split time range to this number of steps and create leaf node for each
* of this time steps.
*
* Speeds up rendering of motion curve primitives in the cost of higher
* memory usage.
*/
int num_motion_curve_steps;
/* Split time range to this number of steps and create leaf node for each
* of this time steps.
*
* Speeds up rendering of motion curve primitives in the cost of higher
* memory usage.
*/
int num_motion_curve_steps;
/* Same as above, but for triangle primitives. */
int num_motion_triangle_steps;
/* Same as above, but for triangle primitives. */
int num_motion_triangle_steps;
/* Same as in SceneParams. */
int bvh_type;
/* Same as in SceneParams. */
int bvh_type;
/* These are needed for Embree. */
int curve_flags;
int curve_subdivisions;
/* These are needed for Embree. */
int curve_flags;
int curve_subdivisions;
/* fixed parameters */
enum { MAX_DEPTH = 64, MAX_SPATIAL_DEPTH = 48, NUM_SPATIAL_BINS = 32 };
/* fixed parameters */
enum {
MAX_DEPTH = 64,
MAX_SPATIAL_DEPTH = 48,
NUM_SPATIAL_BINS = 32
};
BVHParams()
{
use_spatial_split = true;
spatial_split_alpha = 1e-5f;
BVHParams()
{
use_spatial_split = true;
spatial_split_alpha = 1e-5f;
unaligned_split_threshold = 0.7f;
unaligned_split_threshold = 0.7f;
/* todo: see if splitting up primitive cost to be separate for triangles
* and curves can help. so far in tests it doesn't help, but why? */
sah_node_cost = 1.0f;
sah_primitive_cost = 1.0f;
/* todo: see if splitting up primitive cost to be separate for triangles
* and curves can help. so far in tests it doesn't help, but why? */
sah_node_cost = 1.0f;
sah_primitive_cost = 1.0f;
min_leaf_size = 1;
max_triangle_leaf_size = 8;
max_motion_triangle_leaf_size = 8;
max_curve_leaf_size = 1;
max_motion_curve_leaf_size = 4;
min_leaf_size = 1;
max_triangle_leaf_size = 8;
max_motion_triangle_leaf_size = 8;
max_curve_leaf_size = 1;
max_motion_curve_leaf_size = 4;
top_level = false;
bvh_layout = BVH_LAYOUT_BVH2;
use_unaligned_nodes = false;
top_level = false;
bvh_layout = BVH_LAYOUT_BVH2;
use_unaligned_nodes = false;
primitive_mask = PRIMITIVE_ALL;
primitive_mask = PRIMITIVE_ALL;
num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
bvh_type = 0;
bvh_type = 0;
curve_flags = 0;
curve_subdivisions = 4;
}
curve_flags = 0;
curve_subdivisions = 4;
}
/* SAH costs */
__forceinline float cost(int num_nodes, int num_primitives) const
{
return node_cost(num_nodes) + primitive_cost(num_primitives);
}
/* SAH costs */
__forceinline float cost(int num_nodes, int num_primitives) const
{ return node_cost(num_nodes) + primitive_cost(num_primitives); }
__forceinline float primitive_cost(int n) const
{
return n * sah_primitive_cost;
}
__forceinline float primitive_cost(int n) const
{ return n*sah_primitive_cost; }
__forceinline float node_cost(int n) const
{
return n * sah_node_cost;
}
__forceinline float node_cost(int n) const
{ return n*sah_node_cost; }
__forceinline bool small_enough_for_leaf(int size, int level)
{
return (size <= min_leaf_size || level >= MAX_DEPTH);
}
__forceinline bool small_enough_for_leaf(int size, int level)
{ return (size <= min_leaf_size || level >= MAX_DEPTH); }
/* Gets best matching BVH.
*
* If the requested layout is supported by the device, it will be used.
* Otherwise, widest supported layout below that will be used.
*/
static BVHLayout best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts);
/* Gets best matching BVH.
*
* If the requested layout is supported by the device, it will be used.
* Otherwise, widest supported layout below that will be used.
*/
static BVHLayout best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts);
};
/* BVH Reference
@@ -164,49 +165,65 @@ public:
* Reference to a primitive. Primitive index and object are sneakily packed
* into BoundBox to reduce memory usage and align nicely */
class BVHReference
{
public:
__forceinline BVHReference() {}
class BVHReference {
public:
__forceinline BVHReference()
{
}
__forceinline BVHReference(const BoundBox& bounds_,
int prim_index_,
int prim_object_,
int prim_type,
float time_from = 0.0f,
float time_to = 1.0f)
: rbounds(bounds_),
time_from_(time_from),
time_to_(time_to)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
type = prim_type;
}
__forceinline BVHReference(const BoundBox &bounds_,
int prim_index_,
int prim_object_,
int prim_type,
float time_from = 0.0f,
float time_to = 1.0f)
: rbounds(bounds_), time_from_(time_from), time_to_(time_to)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
type = prim_type;
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
__forceinline int prim_type() const { return type; }
__forceinline float time_from() const { return time_from_; }
__forceinline float time_to() const { return time_to_; }
__forceinline const BoundBox &bounds() const
{
return rbounds;
}
__forceinline int prim_index() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int prim_object() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int prim_type() const
{
return type;
}
__forceinline float time_from() const
{
return time_from_;
}
__forceinline float time_to() const
{
return time_to_;
}
BVHReference &operator=(const BVHReference &arg)
{
if (&arg != this) {
/* TODO(sergey): Check if it is still faster to memcpy() with
* modern compilers.
*/
memcpy((void *)this, &arg, sizeof(BVHReference));
}
return *this;
}
BVHReference& operator=(const BVHReference &arg) {
if(&arg != this) {
/* TODO(sergey): Check if it is still faster to memcpy() with
* modern compilers.
*/
memcpy((void *)this, &arg, sizeof(BVHReference));
}
return *this;
}
protected:
BoundBox rbounds;
uint type;
float time_from_, time_to_;
protected:
BoundBox rbounds;
uint type;
float time_from_, time_to_;
};
/* BVH Range
@@ -215,53 +232,68 @@ protected:
* the reference array of a subset of primitives Again uses trickery to pack
* integers into BoundBox for alignment purposes. */
class BVHRange
{
public:
__forceinline BVHRange()
{
rbounds.min.w = __int_as_float(0);
rbounds.max.w = __int_as_float(0);
}
class BVHRange {
public:
__forceinline BVHRange()
{
rbounds.min.w = __int_as_float(0);
rbounds.max.w = __int_as_float(0);
}
__forceinline BVHRange(const BoundBox& bounds_, int start_, int size_)
: rbounds(bounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
}
__forceinline BVHRange(const BoundBox &bounds_, int start_, int size_) : rbounds(bounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
}
__forceinline BVHRange(const BoundBox& bounds_, const BoundBox& cbounds_, int start_, int size_)
: rbounds(bounds_), cbounds(cbounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
}
__forceinline BVHRange(const BoundBox &bounds_, const BoundBox &cbounds_, int start_, int size_)
: rbounds(bounds_), cbounds(cbounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
}
__forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); }
__forceinline void set_start(int start_)
{
rbounds.min.w = __int_as_float(start_);
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline const BoundBox& cent_bounds() const { return cbounds; }
__forceinline int start() const { return __float_as_int(rbounds.min.w); }
__forceinline int size() const { return __float_as_int(rbounds.max.w); }
__forceinline int end() const { return start() + size(); }
__forceinline const BoundBox &bounds() const
{
return rbounds;
}
__forceinline const BoundBox &cent_bounds() const
{
return cbounds;
}
__forceinline int start() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int size() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int end() const
{
return start() + size();
}
protected:
BoundBox rbounds;
BoundBox cbounds;
protected:
BoundBox rbounds;
BoundBox cbounds;
};
/* BVH Spatial Bin */
struct BVHSpatialBin
{
BoundBox bounds;
int enter;
int exit;
struct BVHSpatialBin {
BoundBox bounds;
int enter;
int exit;
__forceinline BVHSpatialBin()
{
}
__forceinline BVHSpatialBin()
{
}
};
/* BVH Spatial Storage
@@ -272,18 +304,18 @@ struct BVHSpatialBin
*/
struct BVHSpatialStorage {
/* Accumulated bounds when sweeping from right to left. */
vector<BoundBox> right_bounds;
/* Accumulated bounds when sweeping from right to left. */
vector<BoundBox> right_bounds;
/* Bins used for histogram when selecting best split plane. */
BVHSpatialBin bins[3][BVHParams::NUM_SPATIAL_BINS];
/* Bins used for histogram when selecting best split plane. */
BVHSpatialBin bins[3][BVHParams::NUM_SPATIAL_BINS];
/* Temporary storage for the new references. Used by spatial split to store
* new references in before they're getting inserted into actual array,
*/
vector<BVHReference> new_references;
/* Temporary storage for the new references. Used by spatial split to store
* new references in before they're getting inserted into actual array,
*/
vector<BVHReference> new_references;
};
CCL_NAMESPACE_END
#endif /* __BVH_PARAMS_H__ */
#endif /* __BVH_PARAMS_H__ */

View File

@@ -27,79 +27,77 @@ CCL_NAMESPACE_BEGIN
static const int BVH_SORT_THRESHOLD = 4096;
struct BVHReferenceCompare {
public:
int dim;
const BVHUnaligned *unaligned_heuristic;
const Transform *aligned_space;
public:
int dim;
const BVHUnaligned *unaligned_heuristic;
const Transform *aligned_space;
BVHReferenceCompare(int dim,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: dim(dim),
unaligned_heuristic(unaligned_heuristic),
aligned_space(aligned_space)
{
}
BVHReferenceCompare(int dim,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
{
}
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
{
return (aligned_space != NULL)
? unaligned_heuristic->compute_aligned_prim_boundbox(
prim, *aligned_space)
: prim.bounds();
}
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
return (aligned_space != NULL) ?
unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
prim.bounds();
}
/* Compare two references.
*
* Returns value is similar to return value of strcmp().
*/
__forceinline int compare(const BVHReference& ra,
const BVHReference& rb) const
{
BoundBox ra_bounds = get_prim_bounds(ra),
rb_bounds = get_prim_bounds(rb);
float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
/* Compare two references.
*
* Returns value is similar to return value of strcmp().
*/
__forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
{
BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
if(ca < cb) return -1;
else if(ca > cb) return 1;
else if(ra.prim_object() < rb.prim_object()) return -1;
else if(ra.prim_object() > rb.prim_object()) return 1;
else if(ra.prim_index() < rb.prim_index()) return -1;
else if(ra.prim_index() > rb.prim_index()) return 1;
else if(ra.prim_type() < rb.prim_type()) return -1;
else if(ra.prim_type() > rb.prim_type()) return 1;
if (ca < cb)
return -1;
else if (ca > cb)
return 1;
else if (ra.prim_object() < rb.prim_object())
return -1;
else if (ra.prim_object() > rb.prim_object())
return 1;
else if (ra.prim_index() < rb.prim_index())
return -1;
else if (ra.prim_index() > rb.prim_index())
return 1;
else if (ra.prim_type() < rb.prim_type())
return -1;
else if (ra.prim_type() > rb.prim_type())
return 1;
return 0;
}
return 0;
}
bool operator()(const BVHReference& ra, const BVHReference& rb)
{
return (compare(ra, rb) < 0);
}
bool operator()(const BVHReference &ra, const BVHReference &rb)
{
return (compare(ra, rb) < 0);
}
};
static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare& compare);
const BVHReferenceCompare &compare);
class BVHSortTask : public Task {
public:
BVHSortTask(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare& compare)
{
run = function_bind(bvh_reference_sort_threaded,
task_pool,
data,
job_start,
job_end,
compare);
}
public:
BVHSortTask(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare &compare)
{
run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
}
};
/* Multi-threaded reference sort. */
@@ -107,74 +105,71 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare& compare)
const BVHReferenceCompare &compare)
{
int start = job_start, end = job_end;
bool have_work = (start < end);
while(have_work) {
const int count = job_end - job_start;
if(count < BVH_SORT_THRESHOLD) {
/* Number of reference low enough, faster to finish the job
* in one thread rather than to spawn more threads.
*/
sort(data+job_start, data+job_end+1, compare);
break;
}
/* Single QSort step.
* Use median-of-three method for the pivot point.
*/
int left = start, right = end;
int center = (left + right) >> 1;
if(compare.compare(data[left], data[center]) > 0) {
swap(data[left], data[center]);
}
if(compare.compare(data[left], data[right]) > 0) {
swap(data[left], data[right]);
}
if(compare.compare(data[center], data[right]) > 0) {
swap(data[center], data[right]);
}
swap(data[center], data[right - 1]);
BVHReference median = data[right - 1];
do {
while(compare.compare(data[left], median) < 0) {
++left;
}
while(compare.compare(data[right], median) > 0) {
--right;
}
if(left <= right) {
swap(data[left], data[right]);
++left;
--right;
}
} while(left <= right);
/* We only create one new task here to reduce downside effects of
* latency in TaskScheduler.
* So generally current thread keeps working on the left part of the
* array, and we create new task for the right side.
* However, if there's nothing to be done in the left side of the array
* we don't create any tasks and make it so current thread works on the
* right side.
*/
have_work = false;
if(left < end) {
if(start < right) {
task_pool->push(new BVHSortTask(task_pool,
data,
left, end,
compare), true);
}
else {
start = left;
have_work = true;
}
}
if(start < right) {
end = right;
have_work = true;
}
}
int start = job_start, end = job_end;
bool have_work = (start < end);
while (have_work) {
const int count = job_end - job_start;
if (count < BVH_SORT_THRESHOLD) {
/* Number of reference low enough, faster to finish the job
* in one thread rather than to spawn more threads.
*/
sort(data + job_start, data + job_end + 1, compare);
break;
}
/* Single QSort step.
* Use median-of-three method for the pivot point.
*/
int left = start, right = end;
int center = (left + right) >> 1;
if (compare.compare(data[left], data[center]) > 0) {
swap(data[left], data[center]);
}
if (compare.compare(data[left], data[right]) > 0) {
swap(data[left], data[right]);
}
if (compare.compare(data[center], data[right]) > 0) {
swap(data[center], data[right]);
}
swap(data[center], data[right - 1]);
BVHReference median = data[right - 1];
do {
while (compare.compare(data[left], median) < 0) {
++left;
}
while (compare.compare(data[right], median) > 0) {
--right;
}
if (left <= right) {
swap(data[left], data[right]);
++left;
--right;
}
} while (left <= right);
/* We only create one new task here to reduce downside effects of
* latency in TaskScheduler.
* So generally current thread keeps working on the left part of the
* array, and we create new task for the right side.
* However, if there's nothing to be done in the left side of the array
* we don't create any tasks and make it so current thread works on the
* right side.
*/
have_work = false;
if (left < end) {
if (start < right) {
task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
}
else {
start = left;
have_work = true;
}
}
if (start < right) {
end = right;
have_work = true;
}
}
}
void bvh_reference_sort(int start,
@@ -184,20 +179,20 @@ void bvh_reference_sort(int start,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
{
const int count = end - start;
BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
if(count < BVH_SORT_THRESHOLD) {
/* It is important to not use any mutex if array is small enough,
* otherwise we end up in situation when we're going to sleep far
* too often.
*/
sort(data+start, data+end, compare);
}
else {
TaskPool task_pool;
bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare);
task_pool.wait_work();
}
const int count = end - start;
BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
if (count < BVH_SORT_THRESHOLD) {
/* It is important to not use any mutex if array is small enough,
* otherwise we end up in situation when we're going to sleep far
* too often.
*/
sort(data + start, data + end, compare);
}
else {
TaskPool task_pool;
bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare);
task_pool.wait_work();
}
}
CCL_NAMESPACE_END

View File

@@ -35,4 +35,4 @@ void bvh_reference_sort(int start,
CCL_NAMESPACE_END
#endif /* __BVH_SORT_H__ */
#endif /* __BVH_SORT_H__ */

View File

@@ -31,322 +31,314 @@ CCL_NAMESPACE_BEGIN
BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: sah(FLT_MAX),
dim(0),
num_left(0),
left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty),
storage_(storage),
references_(references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
: sah(FLT_MAX),
dim(0),
num_left(0),
left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty),
storage_(storage),
references_(references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
const BVHReference *ref_ptr = &references_->at(range.start());
float min_sah = FLT_MAX;
const BVHReference *ref_ptr = &references_->at(range.start());
float min_sah = FLT_MAX;
storage_->right_bounds.resize(range.size());
storage_->right_bounds.resize(range.size());
for(int dim = 0; dim < 3; dim++) {
/* Sort references. */
bvh_reference_sort(range.start(),
range.end(),
&references_->at(0),
dim,
unaligned_heuristic_,
aligned_space_);
for (int dim = 0; dim < 3; dim++) {
/* Sort references. */
bvh_reference_sort(range.start(),
range.end(),
&references_->at(0),
dim,
unaligned_heuristic_,
aligned_space_);
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for(int i = range.size() - 1; i > 0; i--) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
right_bounds.grow(prim_bounds);
storage_->right_bounds[i - 1] = right_bounds;
}
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for (int i = range.size() - 1; i > 0; i--) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
right_bounds.grow(prim_bounds);
storage_->right_bounds[i - 1] = right_bounds;
}
/* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty;
/* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty;
for(int i = 1; i < range.size(); i++) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
left_bounds.grow(prim_bounds);
right_bounds = storage_->right_bounds[i - 1];
for (int i = 1; i < range.size(); i++) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
left_bounds.grow(prim_bounds);
right_bounds = storage_->right_bounds[i - 1];
float sah = nodeSAH +
left_bounds.safe_area() * builder->params.primitive_cost(i) +
right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
float sah = nodeSAH + left_bounds.safe_area() * builder->params.primitive_cost(i) +
right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
if(sah < min_sah) {
min_sah = sah;
if (sah < min_sah) {
min_sah = sah;
this->sah = sah;
this->dim = dim;
this->num_left = i;
this->left_bounds = left_bounds;
this->right_bounds = right_bounds;
}
}
}
this->sah = sah;
this->dim = dim;
this->num_left = i;
this->left_bounds = left_bounds;
this->right_bounds = right_bounds;
}
}
}
}
void BVHObjectSplit::split(BVHRange& left,
BVHRange& right,
const BVHRange& range)
void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &range)
{
assert(references_->size() > 0);
/* sort references according to split */
bvh_reference_sort(range.start(),
range.end(),
&references_->at(0),
this->dim,
unaligned_heuristic_,
aligned_space_);
assert(references_->size() > 0);
/* sort references according to split */
bvh_reference_sort(range.start(),
range.end(),
&references_->at(0),
this->dim,
unaligned_heuristic_,
aligned_space_);
BoundBox effective_left_bounds, effective_right_bounds;
const int num_right = range.size() - this->num_left;
if(aligned_space_ == NULL) {
effective_left_bounds = left_bounds;
effective_right_bounds = right_bounds;
}
else {
effective_left_bounds = BoundBox::empty;
effective_right_bounds = BoundBox::empty;
for(int i = 0; i < this->num_left; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
effective_left_bounds.grow(prim_boundbox);
}
for(int i = 0; i < num_right; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
effective_right_bounds.grow(prim_boundbox);
}
}
BoundBox effective_left_bounds, effective_right_bounds;
const int num_right = range.size() - this->num_left;
if (aligned_space_ == NULL) {
effective_left_bounds = left_bounds;
effective_right_bounds = right_bounds;
}
else {
effective_left_bounds = BoundBox::empty;
effective_right_bounds = BoundBox::empty;
for (int i = 0; i < this->num_left; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
effective_left_bounds.grow(prim_boundbox);
}
for (int i = 0; i < num_right; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
effective_right_bounds.grow(prim_boundbox);
}
}
/* split node ranges */
left = BVHRange(effective_left_bounds, range.start(), this->num_left);
right = BVHRange(effective_right_bounds, left.end(), num_right);
/* split node ranges */
left = BVHRange(effective_left_bounds, range.start(), this->num_left);
right = BVHRange(effective_right_bounds, left.end(), num_right);
}
/* Spatial Split */
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(storage),
references_(references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
: sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(storage),
references_(references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
/* initialize bins. */
BoundBox range_bounds;
if(aligned_space == NULL) {
range_bounds = range.bounds();
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
range,
&references->at(0),
*aligned_space);
}
/* initialize bins. */
BoundBox range_bounds;
if (aligned_space == NULL) {
range_bounds = range.bounds();
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
range, &references->at(0), *aligned_space);
}
float3 origin = range_bounds.min;
float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
float3 invBinSize = 1.0f / binSize;
float3 origin = range_bounds.min;
float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
float3 invBinSize = 1.0f / binSize;
for(int dim = 0; dim < 3; dim++) {
for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
BVHSpatialBin& bin = storage_->bins[dim][i];
for (int dim = 0; dim < 3; dim++) {
for (int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
BVHSpatialBin &bin = storage_->bins[dim][i];
bin.bounds = BoundBox::empty;
bin.enter = 0;
bin.exit = 0;
}
}
bin.bounds = BoundBox::empty;
bin.enter = 0;
bin.exit = 0;
}
}
/* chop references into bins. */
for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
const BVHReference& ref = references_->at(refIdx);
BoundBox prim_bounds = get_prim_bounds(ref);
float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
/* chop references into bins. */
for (unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
const BVHReference &ref = references_->at(refIdx);
BoundBox prim_bounds = get_prim_bounds(ref);
float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
for(int dim = 0; dim < 3; dim++) {
BVHReference currRef(get_prim_bounds(ref),
ref.prim_index(),
ref.prim_object(),
ref.prim_type());
for (int dim = 0; dim < 3; dim++) {
BVHReference currRef(
get_prim_bounds(ref), ref.prim_index(), ref.prim_object(), ref.prim_type());
for(int i = firstBin[dim]; i < lastBin[dim]; i++) {
BVHReference leftRef, rightRef;
for (int i = firstBin[dim]; i < lastBin[dim]; i++) {
BVHReference leftRef, rightRef;
split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
storage_->bins[dim][i].bounds.grow(leftRef.bounds());
currRef = rightRef;
}
split_reference(
builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
storage_->bins[dim][i].bounds.grow(leftRef.bounds());
currRef = rightRef;
}
storage_->bins[dim][lastBin[dim]].bounds.grow(currRef.bounds());
storage_->bins[dim][firstBin[dim]].enter++;
storage_->bins[dim][lastBin[dim]].exit++;
}
}
storage_->bins[dim][lastBin[dim]].bounds.grow(currRef.bounds());
storage_->bins[dim][firstBin[dim]].enter++;
storage_->bins[dim][lastBin[dim]].exit++;
}
}
/* select best split plane. */
storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS);
for(int dim = 0; dim < 3; dim++) {
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
right_bounds.grow(storage_->bins[dim][i].bounds);
storage_->right_bounds[i - 1] = right_bounds;
}
/* select best split plane. */
storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS);
for (int dim = 0; dim < 3; dim++) {
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for (int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
right_bounds.grow(storage_->bins[dim][i].bounds);
storage_->right_bounds[i - 1] = right_bounds;
}
/* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty;
int leftNum = 0;
int rightNum = range.size();
/* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty;
int leftNum = 0;
int rightNum = range.size();
for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
left_bounds.grow(storage_->bins[dim][i - 1].bounds);
leftNum += storage_->bins[dim][i - 1].enter;
rightNum -= storage_->bins[dim][i - 1].exit;
for (int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
left_bounds.grow(storage_->bins[dim][i - 1].bounds);
leftNum += storage_->bins[dim][i - 1].enter;
rightNum -= storage_->bins[dim][i - 1].exit;
float sah = nodeSAH +
left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
storage_->right_bounds[i - 1].safe_area() * builder.params.primitive_cost(rightNum);
float sah = nodeSAH + left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
storage_->right_bounds[i - 1].safe_area() *
builder.params.primitive_cost(rightNum);
if(sah < this->sah) {
this->sah = sah;
this->dim = dim;
this->pos = origin[dim] + binSize[dim] * (float)i;
}
}
}
if (sah < this->sah) {
this->sah = sah;
this->dim = dim;
this->pos = origin[dim] + binSize[dim] * (float)i;
}
}
}
}
void BVHSpatialSplit::split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range)
BVHRange &left,
BVHRange &right,
const BVHRange &range)
{
/* Categorize references and compute bounds.
*
* Left-hand side: [left_start, left_end[
* Uncategorized/split: [left_end, right_start[
* Right-hand side: [right_start, refs.size()[ */
/* Categorize references and compute bounds.
*
* Left-hand side: [left_start, left_end[
* Uncategorized/split: [left_end, right_start[
* Right-hand side: [right_start, refs.size()[ */
vector<BVHReference>& refs = *references_;
int left_start = range.start();
int left_end = left_start;
int right_start = range.end();
int right_end = range.end();
BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty;
vector<BVHReference> &refs = *references_;
int left_start = range.start();
int left_end = left_start;
int right_start = range.end();
int right_end = range.end();
BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty;
for(int i = left_end; i < right_start; i++) {
BoundBox prim_bounds = get_prim_bounds(refs[i]);
if(prim_bounds.max[this->dim] <= this->pos) {
/* entirely on the left-hand side */
left_bounds.grow(prim_bounds);
swap(refs[i], refs[left_end++]);
}
else if(prim_bounds.min[this->dim] >= this->pos) {
/* entirely on the right-hand side */
right_bounds.grow(prim_bounds);
swap(refs[i--], refs[--right_start]);
}
}
for (int i = left_end; i < right_start; i++) {
BoundBox prim_bounds = get_prim_bounds(refs[i]);
if (prim_bounds.max[this->dim] <= this->pos) {
/* entirely on the left-hand side */
left_bounds.grow(prim_bounds);
swap(refs[i], refs[left_end++]);
}
else if (prim_bounds.min[this->dim] >= this->pos) {
/* entirely on the right-hand side */
right_bounds.grow(prim_bounds);
swap(refs[i--], refs[--right_start]);
}
}
/* Duplicate or unsplit references intersecting both sides.
*
* Duplication happens into a temporary pre-allocated vector in order to
* reduce number of memmove() calls happening in vector.insert().
*/
vector<BVHReference>& new_refs = storage_->new_references;
new_refs.clear();
new_refs.reserve(right_start - left_end);
while(left_end < right_start) {
/* split reference. */
BVHReference curr_ref(get_prim_bounds(refs[left_end]),
refs[left_end].prim_index(),
refs[left_end].prim_object(),
refs[left_end].prim_type());
BVHReference lref, rref;
split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos);
/* Duplicate or unsplit references intersecting both sides.
*
* Duplication happens into a temporary pre-allocated vector in order to
* reduce number of memmove() calls happening in vector.insert().
*/
vector<BVHReference> &new_refs = storage_->new_references;
new_refs.clear();
new_refs.reserve(right_start - left_end);
while (left_end < right_start) {
/* split reference. */
BVHReference curr_ref(get_prim_bounds(refs[left_end]),
refs[left_end].prim_index(),
refs[left_end].prim_object(),
refs[left_end].prim_type());
BVHReference lref, rref;
split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos);
/* compute SAH for duplicate/unsplit candidates. */
BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds.
BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds.
BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds.
BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds.
/* compute SAH for duplicate/unsplit candidates. */
BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds.
BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds.
BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds.
BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds.
lub.grow(curr_ref.bounds());
rub.grow(curr_ref.bounds());
ldb.grow(lref.bounds());
rdb.grow(rref.bounds());
lub.grow(curr_ref.bounds());
rub.grow(curr_ref.bounds());
ldb.grow(lref.bounds());
rdb.grow(rref.bounds());
float lac = builder->params.primitive_cost(left_end - left_start);
float rac = builder->params.primitive_cost(right_end - right_start);
float lbc = builder->params.primitive_cost(left_end - left_start + 1);
float rbc = builder->params.primitive_cost(right_end - right_start + 1);
float lac = builder->params.primitive_cost(left_end - left_start);
float rac = builder->params.primitive_cost(right_end - right_start);
float lbc = builder->params.primitive_cost(left_end - left_start + 1);
float rbc = builder->params.primitive_cost(right_end - right_start + 1);
float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
if(minSAH == unsplitLeftSAH) {
/* unsplit to left */
left_bounds = lub;
left_end++;
}
else if(minSAH == unsplitRightSAH) {
/* unsplit to right */
right_bounds = rub;
swap(refs[left_end], refs[--right_start]);
}
else {
/* duplicate */
left_bounds = ldb;
right_bounds = rdb;
refs[left_end++] = lref;
new_refs.push_back(rref);
right_end++;
}
}
/* Insert duplicated references into actual array in one go. */
if(new_refs.size() != 0) {
refs.insert(refs.begin() + (right_end - new_refs.size()),
new_refs.begin(),
new_refs.end());
}
if(aligned_space_ != NULL) {
left_bounds = right_bounds = BoundBox::empty;
for(int i = left_start; i < left_end - left_start; ++i) {
BoundBox prim_boundbox = references_->at(i).bounds();
left_bounds.grow(prim_boundbox);
}
for(int i = right_start; i < right_end - right_start; ++i) {
BoundBox prim_boundbox = references_->at(i).bounds();
right_bounds.grow(prim_boundbox);
}
}
left = BVHRange(left_bounds, left_start, left_end - left_start);
right = BVHRange(right_bounds, right_start, right_end - right_start);
if (minSAH == unsplitLeftSAH) {
/* unsplit to left */
left_bounds = lub;
left_end++;
}
else if (minSAH == unsplitRightSAH) {
/* unsplit to right */
right_bounds = rub;
swap(refs[left_end], refs[--right_start]);
}
else {
/* duplicate */
left_bounds = ldb;
right_bounds = rdb;
refs[left_end++] = lref;
new_refs.push_back(rref);
right_end++;
}
}
/* Insert duplicated references into actual array in one go. */
if (new_refs.size() != 0) {
refs.insert(refs.begin() + (right_end - new_refs.size()), new_refs.begin(), new_refs.end());
}
if (aligned_space_ != NULL) {
left_bounds = right_bounds = BoundBox::empty;
for (int i = left_start; i < left_end - left_start; ++i) {
BoundBox prim_boundbox = references_->at(i).bounds();
left_bounds.grow(prim_boundbox);
}
for (int i = right_start; i < right_end - right_start; ++i) {
BoundBox prim_boundbox = references_->at(i).bounds();
right_bounds.grow(prim_boundbox);
}
}
left = BVHRange(left_bounds, left_start, left_end - left_start);
right = BVHRange(right_bounds, right_start, right_end - right_start);
}
void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
@@ -354,36 +346,36 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
int prim_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
Mesh::Triangle t = mesh->get_triangle(prim_index);
const float3 *verts = &mesh->verts[0];
float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
v1 = get_unaligned_point(v1);
Mesh::Triangle t = mesh->get_triangle(prim_index);
const float3 *verts = &mesh->verts[0];
float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
v1 = get_unaligned_point(v1);
for(int i = 0; i < 3; i++) {
float3 v0 = v1;
int vindex = t.v[i];
v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
v1 = get_unaligned_point(v1);
float v0p = v0[dim];
float v1p = v1[dim];
for (int i = 0; i < 3; i++) {
float3 v0 = v1;
int vindex = t.v[i];
v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
v1 = get_unaligned_point(v1);
float v0p = v0[dim];
float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */
if(v0p <= pos)
left_bounds.grow(v0);
/* insert vertex to the boxes it belongs to. */
if (v0p <= pos)
left_bounds.grow(v0);
if(v0p >= pos)
right_bounds.grow(v0);
if (v0p >= pos)
right_bounds.grow(v0);
/* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
}
}
/* edge intersects the plane => insert intersection to both boxes. */
if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
}
}
}
void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
@@ -392,163 +384,125 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
int segment_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
Mesh::Curve curve = mesh->get_curve(prim_index);
const int k0 = curve.first_key + segment_index;
const int k1 = k0 + 1;
float3 v0 = mesh->curve_keys[k0];
float3 v1 = mesh->curve_keys[k1];
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
Mesh::Curve curve = mesh->get_curve(prim_index);
const int k0 = curve.first_key + segment_index;
const int k1 = k0 + 1;
float3 v0 = mesh->curve_keys[k0];
float3 v1 = mesh->curve_keys[k1];
if(tfm != NULL) {
v0 = transform_point(tfm, v0);
v1 = transform_point(tfm, v1);
}
v0 = get_unaligned_point(v0);
v1 = get_unaligned_point(v1);
if (tfm != NULL) {
v0 = transform_point(tfm, v0);
v1 = transform_point(tfm, v1);
}
v0 = get_unaligned_point(v0);
v1 = get_unaligned_point(v1);
float v0p = v0[dim];
float v1p = v1[dim];
float v0p = v0[dim];
float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */
if(v0p <= pos)
left_bounds.grow(v0);
/* insert vertex to the boxes it belongs to. */
if (v0p <= pos)
left_bounds.grow(v0);
if(v0p >= pos)
right_bounds.grow(v0);
if (v0p >= pos)
right_bounds.grow(v0);
if(v1p <= pos)
left_bounds.grow(v1);
if (v1p <= pos)
left_bounds.grow(v1);
if(v1p >= pos)
right_bounds.grow(v1);
if (v1p >= pos)
right_bounds.grow(v1);
/* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
}
/* edge intersects the plane => insert intersection to both boxes. */
if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
}
}
void BVHSpatialSplit::split_triangle_reference(const BVHReference& ref,
void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
split_triangle_primitive(mesh,
NULL,
ref.prim_index(),
dim,
pos,
left_bounds,
right_bounds);
split_triangle_primitive(mesh, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
}
void BVHSpatialSplit::split_curve_reference(const BVHReference& ref,
void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
split_curve_primitive(mesh,
NULL,
ref.prim_index(),
PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
dim,
pos,
left_bounds,
right_bounds);
split_curve_primitive(mesh,
NULL,
ref.prim_index(),
PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
dim,
pos,
left_bounds,
right_bounds);
}
void BVHSpatialSplit::split_object_reference(const Object *object,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
void BVHSpatialSplit::split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
{
Mesh *mesh = object->mesh;
for(int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
split_triangle_primitive(mesh,
&object->tfm,
tri_idx,
dim,
pos,
left_bounds,
right_bounds);
}
for(int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
Mesh::Curve curve = mesh->get_curve(curve_idx);
for(int segment_idx = 0;
segment_idx < curve.num_keys - 1;
++segment_idx)
{
split_curve_primitive(mesh,
&object->tfm,
curve_idx,
segment_idx,
dim,
pos,
left_bounds,
right_bounds);
}
}
Mesh *mesh = object->mesh;
for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
}
for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
Mesh::Curve curve = mesh->get_curve(curve_idx);
for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
split_curve_primitive(
mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
}
}
}
void BVHSpatialSplit::split_reference(const BVHBuild& builder,
BVHReference& left,
BVHReference& right,
const BVHReference& ref,
void BVHSpatialSplit::split_reference(const BVHBuild &builder,
BVHReference &left,
BVHReference &right,
const BVHReference &ref,
int dim,
float pos)
{
/* initialize boundboxes */
BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty;
/* initialize boundboxes */
BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty;
/* loop over vertices/edges. */
const Object *ob = builder.objects[ref.prim_object()];
const Mesh *mesh = ob->mesh;
/* loop over vertices/edges. */
const Object *ob = builder.objects[ref.prim_object()];
const Mesh *mesh = ob->mesh;
if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
split_triangle_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
}
else if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
split_curve_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
}
else {
split_object_reference(ob,
dim,
pos,
left_bounds,
right_bounds);
}
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else {
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
}
/* intersect with original bounds. */
left_bounds.max[dim] = pos;
right_bounds.min[dim] = pos;
/* intersect with original bounds. */
left_bounds.max[dim] = pos;
right_bounds.min[dim] = pos;
left_bounds.intersect(ref.bounds());
right_bounds.intersect(ref.bounds());
left_bounds.intersect(ref.bounds());
right_bounds.intersect(ref.bounds());
/* set references */
left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
/* set references */
left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
}
CCL_NAMESPACE_END

View File

@@ -28,235 +28,211 @@ struct Transform;
/* Object Split */
class BVHObjectSplit
{
public:
float sah;
int dim;
int num_left;
BoundBox left_bounds;
BoundBox right_bounds;
class BVHObjectSplit {
public:
float sah;
int dim;
int num_left;
BoundBox left_bounds;
BoundBox right_bounds;
BVHObjectSplit() {}
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
BVHObjectSplit()
{
}
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split(BVHRange &left, BVHRange &right, const BVHRange &range);
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
{
if(aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
}
}
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
if (aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
};
/* Spatial Split */
class BVHSpatialSplit
{
public:
float sah;
int dim;
float pos;
class BVHSpatialSplit {
public:
float sah;
int dim;
float pos;
BVHSpatialSplit() : sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(NULL),
references_(NULL) {}
BVHSpatialSplit(const BVHBuild& builder,
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f), storage_(NULL), references_(NULL)
{
}
BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split(BVHBuild *builder, BVHRange &left, BVHRange &right, const BVHRange &range);
void split_reference(const BVHBuild& builder,
BVHReference& left,
BVHReference& right,
const BVHReference& ref,
int dim,
float pos);
void split_reference(const BVHBuild &builder,
BVHReference &left,
BVHReference &right,
const BVHReference &ref,
int dim,
float pos);
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
/* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split.
*
* Operates directly with primitive specified by it's index, reused by higher
* level splitting functions.
*/
void split_triangle_primitive(const Mesh *mesh,
const Transform *tfm,
int prim_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
void split_curve_primitive(const Mesh *mesh,
const Transform *tfm,
int prim_index,
int segment_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
/* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split.
*
* Operates directly with primitive specified by it's index, reused by higher
* level splitting functions.
*/
void split_triangle_primitive(const Mesh *mesh,
const Transform *tfm,
int prim_index,
int dim,
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_curve_primitive(const Mesh *mesh,
const Transform *tfm,
int prim_index,
int segment_index,
int dim,
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
/* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split.
*
* Operates with BVHReference, internally uses lower level API functions.
*/
void split_triangle_reference(const BVHReference& ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
void split_curve_reference(const BVHReference& ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
void split_object_reference(const Object *object,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
/* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split.
*
* Operates with BVHReference, internally uses lower level API functions.
*/
void split_triangle_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_curve_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
{
if(aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
}
}
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
if (aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
__forceinline float3 get_unaligned_point(const float3& point) const
{
if(aligned_space_ == NULL) {
return point;
}
else {
return transform_point(aligned_space_, point);
}
}
__forceinline float3 get_unaligned_point(const float3 &point) const
{
if (aligned_space_ == NULL) {
return point;
}
else {
return transform_point(aligned_space_, point);
}
}
};
/* Mixed Object-Spatial Split */
class BVHMixedSplit
{
public:
BVHObjectSplit object;
BVHSpatialSplit spatial;
class BVHMixedSplit {
public:
BVHObjectSplit object;
BVHSpatialSplit spatial;
float leafSAH;
float nodeSAH;
float minSAH;
float leafSAH;
float nodeSAH;
float minSAH;
bool no_split;
bool no_split;
BoundBox bounds;
BoundBox bounds;
BVHMixedSplit() {}
BVHMixedSplit()
{
}
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
int level,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL)
{
if(aligned_space == NULL) {
bounds = range.bounds();
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
range,
&references->at(0),
*aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
int level,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL)
{
if (aligned_space == NULL) {
bounds = range.bounds();
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
range, &references->at(0), *aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2);
leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2);
object = BVHObjectSplit(builder,
storage,
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
object = BVHObjectSplit(
builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
BoundBox overlap = object.left_bounds;
overlap.intersect(object.right_bounds);
if (builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
BoundBox overlap = object.left_bounds;
overlap.intersect(object.right_bounds);
if(overlap.safe_area() >= builder->spatial_min_overlap) {
spatial = BVHSpatialSplit(*builder,
storage,
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
}
}
if (overlap.safe_area() >= builder->spatial_min_overlap) {
spatial = BVHSpatialSplit(
*builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
}
}
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
no_split = (minSAH == leafSAH &&
builder->range_within_max_leaf_size(range, *references));
}
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
}
__forceinline void split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range)
{
if(builder->params.use_spatial_split && minSAH == spatial.sah)
spatial.split(builder, left, right, range);
if(!left.size() || !right.size())
object.split(left, right, range);
}
__forceinline void split(BVHBuild *builder,
BVHRange &left,
BVHRange &right,
const BVHRange &range)
{
if (builder->params.use_spatial_split && minSAH == spatial.sah)
spatial.split(builder, left, right, range);
if (!left.size() || !right.size())
object.split(left, right, range);
}
};
CCL_NAMESPACE_END
#endif /* __BVH_SPLIT_H__ */
#endif /* __BVH_SPLIT_H__ */

View File

@@ -27,150 +27,137 @@
CCL_NAMESPACE_BEGIN
BVHUnaligned::BVHUnaligned(const vector<Object*>& objects)
: objects_(objects)
BVHUnaligned::BVHUnaligned(const vector<Object *> &objects) : objects_(objects)
{
}
Transform BVHUnaligned::compute_aligned_space(
const BVHObjectBinning& range,
const BVHReference *references) const
Transform BVHUnaligned::compute_aligned_space(const BVHObjectBinning &range,
const BVHReference *references) const
{
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
Transform aligned_space;
/* Use first primitive which defines correct direction to define
* the orientation space.
*/
if(compute_aligned_space(ref, &aligned_space)) {
return aligned_space;
}
}
return transform_identity();
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
Transform aligned_space;
/* Use first primitive which defines correct direction to define
* the orientation space.
*/
if (compute_aligned_space(ref, &aligned_space)) {
return aligned_space;
}
}
return transform_identity();
}
Transform BVHUnaligned::compute_aligned_space(
const BVHRange& range,
const BVHReference *references) const
Transform BVHUnaligned::compute_aligned_space(const BVHRange &range,
const BVHReference *references) const
{
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
Transform aligned_space;
/* Use first primitive which defines correct direction to define
* the orientation space.
*/
if(compute_aligned_space(ref, &aligned_space)) {
return aligned_space;
}
}
return transform_identity();
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
Transform aligned_space;
/* Use first primitive which defines correct direction to define
* the orientation space.
*/
if (compute_aligned_space(ref, &aligned_space)) {
return aligned_space;
}
}
return transform_identity();
}
bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
Transform *aligned_space) const
bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const
{
const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
if(type & PRIMITIVE_CURVE) {
const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve& curve = mesh->get_curve(curve_index);
const int key = curve.first_key + segment;
const float3 v1 = mesh->curve_keys[key],
v2 = mesh->curve_keys[key + 1];
float length;
const float3 axis = normalize_len(v2 - v1, &length);
if(length > 1e-6f) {
*aligned_space = make_transform_frame(axis);
return true;
}
}
*aligned_space = transform_identity();
return false;
const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
if (type & PRIMITIVE_CURVE) {
const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve &curve = mesh->get_curve(curve_index);
const int key = curve.first_key + segment;
const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1];
float length;
const float3 axis = normalize_len(v2 - v1, &length);
if (length > 1e-6f) {
*aligned_space = make_transform_frame(axis);
return true;
}
}
*aligned_space = transform_identity();
return false;
}
BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
const BVHReference& prim,
const Transform& aligned_space) const
BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
const Transform &aligned_space) const
{
BoundBox bounds = BoundBox::empty;
const Object *object = objects_[prim.prim_object()];
const int packed_type = prim.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
if(type & PRIMITIVE_CURVE) {
const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve& curve = mesh->get_curve(curve_index);
curve.bounds_grow(segment,
&mesh->curve_keys[0],
&mesh->curve_radius[0],
aligned_space,
bounds);
}
else {
bounds = prim.bounds().transformed(&aligned_space);
}
return bounds;
BoundBox bounds = BoundBox::empty;
const Object *object = objects_[prim.prim_object()];
const int packed_type = prim.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
if (type & PRIMITIVE_CURVE) {
const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve &curve = mesh->get_curve(curve_index);
curve.bounds_grow(
segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds);
}
else {
bounds = prim.bounds().transformed(&aligned_space);
}
return bounds;
}
BoundBox BVHUnaligned::compute_aligned_boundbox(
const BVHObjectBinning& range,
const BVHReference *references,
const Transform& aligned_space,
BoundBox *cent_bounds) const
BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds) const
{
BoundBox bounds = BoundBox::empty;
if(cent_bounds != NULL) {
*cent_bounds = BoundBox::empty;
}
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds);
if(cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2());
}
}
return bounds;
BoundBox bounds = BoundBox::empty;
if (cent_bounds != NULL) {
*cent_bounds = BoundBox::empty;
}
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds);
if (cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2());
}
}
return bounds;
}
BoundBox BVHUnaligned::compute_aligned_boundbox(
const BVHRange& range,
const BVHReference *references,
const Transform& aligned_space,
BoundBox *cent_bounds) const
BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHRange &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds) const
{
BoundBox bounds = BoundBox::empty;
if(cent_bounds != NULL) {
*cent_bounds = BoundBox::empty;
}
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds);
if(cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2());
}
}
return bounds;
BoundBox bounds = BoundBox::empty;
if (cent_bounds != NULL) {
*cent_bounds = BoundBox::empty;
}
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds);
if (cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2());
}
}
return bounds;
}
Transform BVHUnaligned::compute_node_transform(
const BoundBox& bounds,
const Transform& aligned_space)
Transform BVHUnaligned::compute_node_transform(const BoundBox &bounds,
const Transform &aligned_space)
{
Transform space = aligned_space;
space.x.w -= bounds.min.x;
space.y.w -= bounds.min.y;
space.z.w -= bounds.min.z;
float3 dim = bounds.max - bounds.min;
return transform_scale(1.0f / max(1e-18f, dim.x),
1.0f / max(1e-18f, dim.y),
1.0f / max(1e-18f, dim.z)) * space;
Transform space = aligned_space;
space.x.w -= bounds.min.x;
space.y.w -= bounds.min.y;
space.z.w -= bounds.min.z;
float3 dim = bounds.max - bounds.min;
return transform_scale(
1.0f / max(1e-18f, dim.x), 1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.z)) *
space;
}
CCL_NAMESPACE_END

View File

@@ -30,51 +30,44 @@ class Object;
/* Helper class to perform calculations needed for unaligned nodes. */
class BVHUnaligned {
public:
BVHUnaligned(const vector<Object*>& objects);
public:
BVHUnaligned(const vector<Object *> &objects);
/* Calculate alignment for the oriented node for a given range. */
Transform compute_aligned_space(
const BVHObjectBinning& range,
const BVHReference *references) const;
Transform compute_aligned_space(
const BVHRange& range,
const BVHReference *references) const;
/* Calculate alignment for the oriented node for a given range. */
Transform compute_aligned_space(const BVHObjectBinning &range,
const BVHReference *references) const;
Transform compute_aligned_space(const BVHRange &range, const BVHReference *references) const;
/* Calculate alignment for the oriented node for a given reference.
*
* Return true when space was calculated successfully.
*/
bool compute_aligned_space(const BVHReference& ref,
Transform *aligned_space) const;
/* Calculate alignment for the oriented node for a given reference.
*
* Return true when space was calculated successfully.
*/
bool compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const;
/* Calculate primitive's bounding box in given space. */
BoundBox compute_aligned_prim_boundbox(
const BVHReference& prim,
const Transform& aligned_space) const;
/* Calculate primitive's bounding box in given space. */
BoundBox compute_aligned_prim_boundbox(const BVHReference &prim,
const Transform &aligned_space) const;
/* Calculate bounding box in given space. */
BoundBox compute_aligned_boundbox(
const BVHObjectBinning& range,
const BVHReference *references,
const Transform& aligned_space,
BoundBox *cent_bounds = NULL) const;
BoundBox compute_aligned_boundbox(
const BVHRange& range,
const BVHReference *references,
const Transform& aligned_space,
BoundBox *cent_bounds = NULL) const;
/* Calculate bounding box in given space. */
BoundBox compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds = NULL) const;
BoundBox compute_aligned_boundbox(const BVHRange &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds = NULL) const;
/* Calculate affine transform for node packing.
* Bounds will be in the range of 0..1.
*/
static Transform compute_node_transform(const BoundBox& bounds,
const Transform& aligned_space);
protected:
/* List of objects BVH is being created for. */
const vector<Object*>& objects_;
/* Calculate affine transform for node packing.
* Bounds will be in the range of 0..1.
*/
static Transform compute_node_transform(const BoundBox &bounds, const Transform &aligned_space);
protected:
/* List of objects BVH is being created for. */
const vector<Object *> &objects_;
};
CCL_NAMESPACE_END
#endif /* __BVH_UNALIGNED_H__ */
#endif /* __BVH_UNALIGNED_H__ */

View File

@@ -2,24 +2,24 @@
# Precompiled libraries tips and hints, for find_package().
if(CYCLES_STANDALONE_REPOSITORY)
if(APPLE OR WIN32)
include(precompiled_libs)
endif()
if(APPLE OR WIN32)
include(precompiled_libs)
endif()
endif()
###########################################################################
# GLUT
if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
set(GLUT_ROOT_PATH ${CYCLES_GLUT})
set(GLUT_ROOT_PATH ${CYCLES_GLUT})
find_package(GLUT)
message(STATUS "GLUT_FOUND=${GLUT_FOUND}")
find_package(GLUT)
message(STATUS "GLUT_FOUND=${GLUT_FOUND}")
include_directories(
SYSTEM
${GLUT_INCLUDE_DIR}
)
include_directories(
SYSTEM
${GLUT_INCLUDE_DIR}
)
endif()
###########################################################################
@@ -27,125 +27,125 @@ endif()
# Workaround for unconventional variable name use in Blender.
if(NOT CYCLES_STANDALONE_REPOSITORY)
set(GLEW_INCLUDE_DIR "${GLEW_INCLUDE_PATH}")
set(GLEW_INCLUDE_DIR "${GLEW_INCLUDE_PATH}")
endif()
if(WITH_CYCLES_STANDALONE)
set(CYCLES_APP_GLEW_LIBRARY ${BLENDER_GLEW_LIBRARIES})
set(CYCLES_APP_GLEW_LIBRARY ${BLENDER_GLEW_LIBRARIES})
endif()
###########################################################################
# CUDA
if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
find_package(CUDA) # Try to auto locate CUDA toolkit
if(CUDA_FOUND)
message(STATUS "CUDA nvcc = ${CUDA_NVCC_EXECUTABLE}")
else()
message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES")
set(WITH_CYCLES_CUDA_BINARIES OFF)
if(NOT WITH_CUDA_DYNLOAD)
message(STATUS "Additionally falling back to dynamic CUDA load")
set(WITH_CUDA_DYNLOAD ON)
endif()
endif()
find_package(CUDA) # Try to auto locate CUDA toolkit
if(CUDA_FOUND)
message(STATUS "CUDA nvcc = ${CUDA_NVCC_EXECUTABLE}")
else()
message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES")
set(WITH_CYCLES_CUDA_BINARIES OFF)
if(NOT WITH_CUDA_DYNLOAD)
message(STATUS "Additionally falling back to dynamic CUDA load")
set(WITH_CUDA_DYNLOAD ON)
endif()
endif()
endif()
# Packages which are being found by Blender when building from inside Blender
# source code. but which we need to take care of when building Cycles from a
# standalone repository
if(CYCLES_STANDALONE_REPOSITORY)
# PThreads
# TODO(sergey): Bloody exception, handled in precompiled_libs.cmake.
if(NOT WIN32)
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
find_package(Threads REQUIRED)
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif()
# PThreads
# TODO(sergey): Bloody exception, handled in precompiled_libs.cmake.
if(NOT WIN32)
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
find_package(Threads REQUIRED)
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif()
####
# OpenGL
####
# OpenGL
# TODO(sergey): We currently re-use the same variable name as we use
# in Blender. Ideally we need to make it CYCLES_GL_LIBRARIES.
find_package(OpenGL REQUIRED)
find_package(GLEW REQUIRED)
list(APPEND BLENDER_GL_LIBRARIES
"${OPENGL_gl_LIBRARY}"
"${OPENGL_glu_LIBRARY}"
"${GLEW_LIBRARY}"
)
# TODO(sergey): We currently re-use the same variable name as we use
# in Blender. Ideally we need to make it CYCLES_GL_LIBRARIES.
find_package(OpenGL REQUIRED)
find_package(GLEW REQUIRED)
list(APPEND BLENDER_GL_LIBRARIES
"${OPENGL_gl_LIBRARY}"
"${OPENGL_glu_LIBRARY}"
"${GLEW_LIBRARY}"
)
####
# OpenImageIO
find_package(OpenImageIO REQUIRED)
if(OPENIMAGEIO_PUGIXML_FOUND)
set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR/OpenImageIO}")
set(PUGIXML_LIBRARIES "")
else()
find_package(PugiXML REQUIRED)
endif()
####
# OpenImageIO
find_package(OpenImageIO REQUIRED)
if(OPENIMAGEIO_PUGIXML_FOUND)
set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR/OpenImageIO}")
set(PUGIXML_LIBRARIES "")
else()
find_package(PugiXML REQUIRED)
endif()
# OIIO usually depends on OpenEXR, so find this library
# but don't make it required.
find_package(OpenEXR)
# OIIO usually depends on OpenEXR, so find this library
# but don't make it required.
find_package(OpenEXR)
####
# OpenShadingLanguage
if(WITH_CYCLES_OSL)
find_package(OpenShadingLanguage REQUIRED)
find_package(LLVM REQUIRED)
endif()
####
# OpenShadingLanguage
if(WITH_CYCLES_OSL)
find_package(OpenShadingLanguage REQUIRED)
find_package(LLVM REQUIRED)
endif()
####
# OpenColorIO
if(WITH_OPENCOLORIO)
find_package(OpenColorIO REQUIRED)
endif()
####
# OpenColorIO
if(WITH_OPENCOLORIO)
find_package(OpenColorIO REQUIRED)
endif()
####
# Boost
set(__boost_packages filesystem regex system thread date_time)
if(WITH_CYCLES_NETWORK)
list(APPEND __boost_packages serialization)
endif()
if(WITH_CYCLES_OSL)
# TODO(sergey): This is because of the way how our precompiled
# libraries works, could be different for someone's else libs..
if(APPLE OR MSVC)
list(APPEND __boost_packages wave)
elseif(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
list(APPEND __boost_packages wave)
endif()
endif()
find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED)
if(NOT Boost_FOUND)
# Try to find non-multithreaded if -mt not found, this flag
# doesn't matter for us, it has nothing to do with thread
# safety, but keep it to not disturb build setups.
set(Boost_USE_MULTITHREADED OFF)
find_package(Boost 1.48 COMPONENTS ${__boost_packages})
endif()
unset(__boost_packages)
set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
set(BOOST_LIBRARIES ${Boost_LIBRARIES})
set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
####
# Boost
set(__boost_packages filesystem regex system thread date_time)
if(WITH_CYCLES_NETWORK)
list(APPEND __boost_packages serialization)
endif()
if(WITH_CYCLES_OSL)
# TODO(sergey): This is because of the way how our precompiled
# libraries works, could be different for someone's else libs..
if(APPLE OR MSVC)
list(APPEND __boost_packages wave)
elseif(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
list(APPEND __boost_packages wave)
endif()
endif()
find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED)
if(NOT Boost_FOUND)
# Try to find non-multithreaded if -mt not found, this flag
# doesn't matter for us, it has nothing to do with thread
# safety, but keep it to not disturb build setups.
set(Boost_USE_MULTITHREADED OFF)
find_package(Boost 1.48 COMPONENTS ${__boost_packages})
endif()
unset(__boost_packages)
set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
set(BOOST_LIBRARIES ${Boost_LIBRARIES})
set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
####
# embree
if(WITH_CYCLES_EMBREE)
find_package(embree 3.2.4 REQUIRED)
endif()
####
# embree
if(WITH_CYCLES_EMBREE)
find_package(embree 3.2.4 REQUIRED)
endif()
####
# Logging
if(WITH_CYCLES_LOGGING)
find_package(Glog REQUIRED)
find_package(Gflags REQUIRED)
endif()
####
# Logging
if(WITH_CYCLES_LOGGING)
find_package(Glog REQUIRED)
find_package(Gflags REQUIRED)
endif()
unset(_lib_DIR)
unset(_lib_DIR)
else()
set(LLVM_LIBRARIES ${LLVM_LIBRARY})
set(LLVM_LIBRARIES ${LLVM_LIBRARY})
endif()

View File

@@ -1,15 +1,15 @@
function(cycles_set_solution_folder target)
if(WINDOWS_USE_VISUAL_STUDIO_FOLDERS)
get_filename_component(folderdir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
string(REPLACE ${CMAKE_SOURCE_DIR} "" folderdir ${folderdir})
set_target_properties(${target} PROPERTIES FOLDER ${folderdir})
endif()
if(WINDOWS_USE_VISUAL_STUDIO_FOLDERS)
get_filename_component(folderdir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
string(REPLACE ${CMAKE_SOURCE_DIR} "" folderdir ${folderdir})
set_target_properties(${target} PROPERTIES FOLDER ${folderdir})
endif()
endfunction()
macro(cycles_add_library target library_deps)
add_library(${target} ${ARGN})
if(NOT ("${library_deps}" STREQUAL ""))
target_link_libraries(${target} "${library_deps}")
endif()
cycles_set_solution_folder(${target})
add_library(${target} ${ARGN})
if(NOT ("${library_deps}" STREQUAL ""))
target_link_libraries(${target} "${library_deps}")
endif()
cycles_set_solution_folder(${target})
endmacro()

View File

@@ -1,61 +1,61 @@
set(INC
..
../../glew-mx
..
../../glew-mx
)
set(INC_SYS
${GLEW_INCLUDE_DIR}
../../../extern/clew/include
${GLEW_INCLUDE_DIR}
../../../extern/clew/include
)
if(WITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/cuew/include
)
add_definitions(-DWITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/cuew/include
)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif()
set(SRC
device.cpp
device_cpu.cpp
device_cuda.cpp
device_denoising.cpp
device_memory.cpp
device_multi.cpp
device_opencl.cpp
device_split_kernel.cpp
device_task.cpp
device.cpp
device_cpu.cpp
device_cuda.cpp
device_denoising.cpp
device_memory.cpp
device_multi.cpp
device_opencl.cpp
device_split_kernel.cpp
device_task.cpp
)
set(SRC_OPENCL
opencl/opencl.h
opencl/memory_manager.h
opencl/opencl.h
opencl/memory_manager.h
opencl/opencl_split.cpp
opencl/opencl_util.cpp
opencl/memory_manager.cpp
opencl/opencl_split.cpp
opencl/opencl_util.cpp
opencl/memory_manager.cpp
)
if(WITH_CYCLES_NETWORK)
list(APPEND SRC
device_network.cpp
)
list(APPEND SRC
device_network.cpp
)
endif()
set(SRC_HEADERS
device.h
device_denoising.h
device_memory.h
device_intern.h
device_network.h
device_split_kernel.h
device_task.h
device.h
device_denoising.h
device_memory.h
device_intern.h
device_network.h
device_split_kernel.h
device_task.h
)
set(LIB
@@ -63,27 +63,27 @@ set(LIB
)
if(WITH_CUDA_DYNLOAD)
list(APPEND LIB
extern_cuew
)
list(APPEND LIB
extern_cuew
)
else()
list(APPEND LIB
${CUDA_CUDA_LIBRARY}
)
list(APPEND LIB
${CUDA_CUDA_LIBRARY}
)
endif()
add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
if(WITH_CYCLES_DEVICE_OPENCL)
add_definitions(-DWITH_OPENCL)
add_definitions(-DWITH_OPENCL)
endif()
if(WITH_CYCLES_DEVICE_CUDA)
add_definitions(-DWITH_CUDA)
add_definitions(-DWITH_CUDA)
endif()
if(WITH_CYCLES_DEVICE_MULTI)
add_definitions(-DWITH_MULTI)
add_definitions(-DWITH_MULTI)
endif()
include_directories(${INC})

View File

@@ -44,572 +44,577 @@ uint Device::devices_initialized_mask = 0;
/* Device Requested Features */
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features)
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
{
os << "Experimental features: "
<< (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Use Hair: "
<< string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: "
<< string_from_bool(requested_features.use_object_motion) << std::endl;
os << "Use Camera Motion: "
<< string_from_bool(requested_features.use_camera_motion) << std::endl;
os << "Use Baking: "
<< string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: "
<< string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: "
<< string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: "
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
os << "Use Patch Evaluation: "
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
os << "Use Transparent Shadows: "
<< string_from_bool(requested_features.use_transparent) << std::endl;
os << "Use Principled BSDF: "
<< string_from_bool(requested_features.use_principled) << std::endl;
os << "Use Denoising: "
<< string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: "
<< string_from_bool(requested_features.use_true_displacement) << std::endl;
os << "Use Background Light: "
<< string_from_bool(requested_features.use_background_light) << std::endl;
return os;
os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
<< std::endl;
os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
<< std::endl;
os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
<< std::endl;
os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
<< std::endl;
os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
<< std::endl;
os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
<< std::endl;
os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
<< std::endl;
os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
<< std::endl;
return os;
}
/* Device */
Device::~Device()
{
if(!background) {
if(vertex_buffer != 0) {
glDeleteBuffers(1, &vertex_buffer);
}
if(fallback_shader_program != 0) {
glDeleteProgram(fallback_shader_program);
}
}
if (!background) {
if (vertex_buffer != 0) {
glDeleteBuffers(1, &vertex_buffer);
}
if (fallback_shader_program != 0) {
glDeleteProgram(fallback_shader_program);
}
}
}
/* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER =
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
const char *FALLBACK_FRAGMENT_SHADER =
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
static void shader_print_errors(const char *task, const char *log, const char *code)
{
LOG(ERROR) << "Shader: " << task << " error:";
LOG(ERROR) << "===== shader string ====";
LOG(ERROR) << "Shader: " << task << " error:";
LOG(ERROR) << "===== shader string ====";
stringstream stream(code);
string partial;
stringstream stream(code);
string partial;
int line = 1;
while(getline(stream, partial, '\n')) {
if(line < 10) {
LOG(ERROR) << " " << line << " " << partial;
}
else {
LOG(ERROR) << line << " " << partial;
}
line++;
}
LOG(ERROR) << log;
int line = 1;
while (getline(stream, partial, '\n')) {
if (line < 10) {
LOG(ERROR) << " " << line << " " << partial;
}
else {
LOG(ERROR) << line << " " << partial;
}
line++;
}
LOG(ERROR) << log;
}
static int bind_fallback_shader(void)
{
GLint status;
GLchar log[5000];
GLsizei length = 0;
GLuint program = 0;
GLint status;
GLchar log[5000];
GLsizei length = 0;
GLuint program = 0;
struct Shader {
const char *source;
GLenum type;
} shaders[2] = {
{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}
};
struct Shader {
const char *source;
GLenum type;
} shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
program = glCreateProgram();
program = glCreateProgram();
for(int i = 0; i < 2; i++) {
GLuint shader = glCreateShader(shaders[i].type);
for (int i = 0; i < 2; i++) {
GLuint shader = glCreateShader(shaders[i].type);
string source_str = shaders[i].source;
const char *c_str = source_str.c_str();
string source_str = shaders[i].source;
const char *c_str = source_str.c_str();
glShaderSource(shader, 1, &c_str, NULL);
glCompileShader(shader);
glShaderSource(shader, 1, &c_str, NULL);
glCompileShader(shader);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if(!status) {
glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str);
return 0;
}
if (!status) {
glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str);
return 0;
}
glAttachShader(program, shader);
}
glAttachShader(program, shader);
}
/* Link output. */
glBindFragDataLocation(program, 0, "fragColor");
/* Link output. */
glBindFragDataLocation(program, 0, "fragColor");
/* Link and error check. */
glLinkProgram(program);
/* Link and error check. */
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &status);
if(!status) {
glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
return 0;
}
glGetProgramiv(program, GL_LINK_STATUS, &status);
if (!status) {
glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
return 0;
}
return program;
return program;
}
bool Device::bind_fallback_display_space_shader(const float width, const float height)
{
if(fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
return false;
}
if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
return false;
}
if(fallback_status == FALLBACK_SHADER_STATUS_NONE) {
fallback_shader_program = bind_fallback_shader();
fallback_status = FALLBACK_SHADER_STATUS_ERROR;
if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
fallback_shader_program = bind_fallback_shader();
fallback_status = FALLBACK_SHADER_STATUS_ERROR;
if(fallback_shader_program == 0) {
return false;
}
if (fallback_shader_program == 0) {
return false;
}
glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if(image_texture_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
return false;
}
glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if (image_texture_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
return false;
}
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if(fullscreen_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
return false;
}
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if (fullscreen_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
return false;
}
fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
}
fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
}
/* Run this every time. */
glUseProgram(fallback_shader_program);
glUniform1i(image_texture_location, 0);
glUniform2f(fullscreen_location, width, height);
return true;
/* Run this every time. */
glUseProgram(fallback_shader_program);
glUniform1i(image_texture_location, 0);
glUniform2f(fullscreen_location, width, height);
return true;
}
void Device::draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
void Device::draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
assert(rgba.type == MEM_PIXELS);
mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
assert(rgba.type == MEM_PIXELS);
mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
GLuint texid;
glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
GLuint texid;
glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
if(rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf*)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
}
else {
uint8_t *data_pointer = (uint8_t*)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
}
if (rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
}
else {
uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
if(transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
if (transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
GLint shader_program;
if(use_fallback_shader) {
if(!bind_fallback_display_space_shader(dw, dh)) {
return;
}
shader_program = fallback_shader_program;
}
else {
draw_params.bind_display_space_shader_cb();
glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
}
GLint shader_program;
if (use_fallback_shader) {
if (!bind_fallback_display_space_shader(dw, dh)) {
return;
}
shader_program = fallback_shader_program;
}
else {
draw_params.bind_display_space_shader_cb();
glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
}
if(!vertex_buffer) {
glGenBuffers(1, &vertex_buffer);
}
if (!vertex_buffer) {
glGenBuffers(1, &vertex_buffer);
}
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
if(vpointer) {
/* texture coordinate - vertex pair */
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
vpointer[2] = dx;
vpointer[3] = dy;
if (vpointer) {
/* texture coordinate - vertex pair */
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
vpointer[2] = dx;
vpointer[3] = dy;
vpointer[4] = 1.0f;
vpointer[5] = 0.0f;
vpointer[6] = (float)width + dx;
vpointer[7] = dy;
vpointer[4] = 1.0f;
vpointer[5] = 0.0f;
vpointer[6] = (float)width + dx;
vpointer[7] = dy;
vpointer[8] = 1.0f;
vpointer[9] = 1.0f;
vpointer[10] = (float)width + dx;
vpointer[11] = (float)height + dy;
vpointer[8] = 1.0f;
vpointer[9] = 1.0f;
vpointer[10] = (float)width + dx;
vpointer[11] = (float)height + dy;
vpointer[12] = 0.0f;
vpointer[13] = 1.0f;
vpointer[14] = dx;
vpointer[15] = (float)height + dy;
vpointer[12] = 0.0f;
vpointer[13] = 1.0f;
vpointer[14] = dx;
vpointer[15] = (float)height + dy;
if(vertex_buffer) {
glUnmapBuffer(GL_ARRAY_BUFFER);
}
}
if (vertex_buffer) {
glUnmapBuffer(GL_ARRAY_BUFFER);
}
}
GLuint vertex_array_object;
GLuint position_attribute, texcoord_attribute;
GLuint vertex_array_object;
GLuint position_attribute, texcoord_attribute;
glGenVertexArrays(1, &vertex_array_object);
glBindVertexArray(vertex_array_object);
glGenVertexArrays(1, &vertex_array_object);
glBindVertexArray(vertex_array_object);
texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
position_attribute = glGetAttribLocation(shader_program, "pos");
texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
position_attribute = glGetAttribLocation(shader_program, "pos");
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2));
glVertexAttribPointer(
texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
if(vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
if (vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
if(use_fallback_shader) {
glUseProgram(0);
}
else {
draw_params.unbind_display_space_shader_cb();
}
if (use_fallback_shader) {
glUseProgram(0);
}
else {
draw_params.unbind_display_space_shader_cb();
}
glDeleteVertexArrays(1, &vertex_array_object);
glBindTexture(GL_TEXTURE_2D, 0);
glDeleteTextures(1, &texid);
glDeleteVertexArrays(1, &vertex_array_object);
glBindTexture(GL_TEXTURE_2D, 0);
glDeleteTextures(1, &texid);
if(transparent) {
glDisable(GL_BLEND);
}
if (transparent) {
glDisable(GL_BLEND);
}
}
Device *Device::create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
Device *device;
Device *device;
switch(info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler, background);
break;
switch (info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler, background);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if(device_cuda_init())
device = device_cuda_create(info, stats, profiler, background);
else
device = NULL;
break;
case DEVICE_CUDA:
if (device_cuda_init())
device = device_cuda_create(info, stats, profiler, background);
else
device = NULL;
break;
#endif
#ifdef WITH_MULTI
case DEVICE_MULTI:
device = device_multi_create(info, stats, profiler, background);
break;
case DEVICE_MULTI:
device = device_multi_create(info, stats, profiler, background);
break;
#endif
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
break;
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
break;
#endif
#ifdef WITH_OPENCL
case DEVICE_OPENCL:
if(device_opencl_init())
device = device_opencl_create(info, stats, profiler, background);
else
device = NULL;
break;
case DEVICE_OPENCL:
if (device_opencl_init())
device = device_opencl_create(info, stats, profiler, background);
else
device = NULL;
break;
#endif
default:
return NULL;
}
default:
return NULL;
}
return device;
return device;
}
DeviceType Device::type_from_string(const char *name)
{
if(strcmp(name, "CPU") == 0)
return DEVICE_CPU;
else if(strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
else if(strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
else if(strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK;
else if(strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
if (strcmp(name, "CPU") == 0)
return DEVICE_CPU;
else if (strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
else if (strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
else if (strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK;
else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
return DEVICE_NONE;
return DEVICE_NONE;
}
string Device::string_from_type(DeviceType type)
{
if(type == DEVICE_CPU)
return "CPU";
else if(type == DEVICE_CUDA)
return "CUDA";
else if(type == DEVICE_OPENCL)
return "OPENCL";
else if(type == DEVICE_NETWORK)
return "NETWORK";
else if(type == DEVICE_MULTI)
return "MULTI";
if (type == DEVICE_CPU)
return "CPU";
else if (type == DEVICE_CUDA)
return "CUDA";
else if (type == DEVICE_OPENCL)
return "OPENCL";
else if (type == DEVICE_NETWORK)
return "NETWORK";
else if (type == DEVICE_MULTI)
return "MULTI";
return "";
return "";
}
vector<DeviceType> Device::available_types()
{
vector<DeviceType> types;
types.push_back(DEVICE_CPU);
vector<DeviceType> types;
types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA
types.push_back(DEVICE_CUDA);
types.push_back(DEVICE_CUDA);
#endif
#ifdef WITH_OPENCL
types.push_back(DEVICE_OPENCL);
types.push_back(DEVICE_OPENCL);
#endif
#ifdef WITH_NETWORK
types.push_back(DEVICE_NETWORK);
types.push_back(DEVICE_NETWORK);
#endif
return types;
return types;
}
vector<DeviceInfo> Device::available_devices(uint mask)
{
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
* be broken and cause crashes when only trying to get device info, so
* we don't want to do any initialization until the user chooses to. */
thread_scoped_lock lock(device_mutex);
vector<DeviceInfo> devices;
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
* be broken and cause crashes when only trying to get device info, so
* we don't want to do any initialization until the user chooses to. */
thread_scoped_lock lock(device_mutex);
vector<DeviceInfo> devices;
#ifdef WITH_OPENCL
if(mask & DEVICE_MASK_OPENCL) {
if(!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if(device_opencl_init()) {
device_opencl_info(opencl_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPENCL;
}
foreach(DeviceInfo& info, opencl_devices) {
devices.push_back(info);
}
}
if (mask & DEVICE_MASK_OPENCL) {
if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if (device_opencl_init()) {
device_opencl_info(opencl_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPENCL;
}
foreach (DeviceInfo &info, opencl_devices) {
devices.push_back(info);
}
}
#endif
#ifdef WITH_CUDA
if(mask & DEVICE_MASK_CUDA) {
if(!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if(device_cuda_init()) {
device_cuda_info(cuda_devices);
}
devices_initialized_mask |= DEVICE_MASK_CUDA;
}
foreach(DeviceInfo& info, cuda_devices) {
devices.push_back(info);
}
}
if (mask & DEVICE_MASK_CUDA) {
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if (device_cuda_init()) {
device_cuda_info(cuda_devices);
}
devices_initialized_mask |= DEVICE_MASK_CUDA;
}
foreach (DeviceInfo &info, cuda_devices) {
devices.push_back(info);
}
}
#endif
if(mask & DEVICE_MASK_CPU) {
if(!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
devices_initialized_mask |= DEVICE_MASK_CPU;
}
foreach(DeviceInfo& info, cpu_devices) {
devices.push_back(info);
}
}
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
devices_initialized_mask |= DEVICE_MASK_CPU;
}
foreach (DeviceInfo &info, cpu_devices) {
devices.push_back(info);
}
}
#ifdef WITH_NETWORK
if(mask & DEVICE_MASK_NETWORK) {
if(!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
device_network_info(network_devices);
devices_initialized_mask |= DEVICE_MASK_NETWORK;
}
foreach(DeviceInfo& info, network_devices) {
devices.push_back(info);
}
}
if (mask & DEVICE_MASK_NETWORK) {
if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
device_network_info(network_devices);
devices_initialized_mask |= DEVICE_MASK_NETWORK;
}
foreach (DeviceInfo &info, network_devices) {
devices.push_back(info);
}
}
#endif
return devices;
return devices;
}
string Device::device_capabilities(uint mask)
{
thread_scoped_lock lock(device_mutex);
string capabilities = "";
thread_scoped_lock lock(device_mutex);
string capabilities = "";
if(mask & DEVICE_MASK_CPU) {
capabilities += "\nCPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n";
}
if (mask & DEVICE_MASK_CPU) {
capabilities += "\nCPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n";
}
#ifdef WITH_OPENCL
if(mask & DEVICE_MASK_OPENCL) {
if(device_opencl_init()) {
capabilities += "\nOpenCL device capabilities:\n";
capabilities += device_opencl_capabilities();
}
}
if (mask & DEVICE_MASK_OPENCL) {
if (device_opencl_init()) {
capabilities += "\nOpenCL device capabilities:\n";
capabilities += device_opencl_capabilities();
}
}
#endif
#ifdef WITH_CUDA
if(mask & DEVICE_MASK_CUDA) {
if(device_cuda_init()) {
capabilities += "\nCUDA device capabilities:\n";
capabilities += device_cuda_capabilities();
}
}
if (mask & DEVICE_MASK_CUDA) {
if (device_cuda_init()) {
capabilities += "\nCUDA device capabilities:\n";
capabilities += device_cuda_capabilities();
}
}
#endif
return capabilities;
return capabilities;
}
DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background)
{
assert(subdevices.size() > 0);
assert(subdevices.size() > 0);
if(subdevices.size() == 1) {
/* No multi device needed. */
return subdevices.front();
}
if (subdevices.size() == 1) {
/* No multi device needed. */
return subdevices.front();
}
DeviceInfo info;
info.type = DEVICE_MULTI;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
DeviceInfo info;
info.type = DEVICE_MULTI;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
info.has_half_images = true;
info.has_volume_decoupled = true;
info.has_osl = true;
info.has_profiling = true;
info.has_half_images = true;
info.has_volume_decoupled = true;
info.has_osl = true;
info.has_profiling = true;
foreach(const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
if(device.type == DEVICE_CPU && subdevices.size() > 1) {
if(background) {
int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
if (device.type == DEVICE_CPU && subdevices.size() > 1) {
if (background) {
int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
VLOG(1) << "CPU render threads reduced from "
<< orig_cpu_threads << " to " << cpu_threads
<< ", to dedicate to GPU.";
VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
<< ", to dedicate to GPU.";
if(cpu_threads >= 1) {
DeviceInfo cpu_device = device;
cpu_device.cpu_threads = cpu_threads;
info.multi_devices.push_back(cpu_device);
}
else {
continue;
}
}
else {
VLOG(1) << "CPU render threads disabled for interactive render.";
continue;
}
}
else {
info.multi_devices.push_back(device);
}
if (cpu_threads >= 1) {
DeviceInfo cpu_device = device;
cpu_device.cpu_threads = cpu_threads;
info.multi_devices.push_back(cpu_device);
}
else {
continue;
}
}
else {
VLOG(1) << "CPU render threads disabled for interactive render.";
continue;
}
}
else {
info.multi_devices.push_back(device);
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
}
return info;
return info;
}
void Device::tag_update()
{
free_memory();
free_memory();
}
void Device::free_memory()
{
devices_initialized_mask = 0;
cuda_devices.free_memory();
opencl_devices.free_memory();
cpu_devices.free_memory();
network_devices.free_memory();
devices_initialized_mask = 0;
cuda_devices.free_memory();
opencl_devices.free_memory();
cpu_devices.free_memory();
network_devices.free_memory();
}
CCL_NAMESPACE_END

View File

@@ -40,384 +40,428 @@ class RenderTile;
/* Device Types */
enum DeviceType {
DEVICE_NONE = 0,
DEVICE_CPU,
DEVICE_OPENCL,
DEVICE_CUDA,
DEVICE_NETWORK,
DEVICE_MULTI
DEVICE_NONE = 0,
DEVICE_CPU,
DEVICE_OPENCL,
DEVICE_CUDA,
DEVICE_NETWORK,
DEVICE_MULTI
};
enum DeviceTypeMask {
DEVICE_MASK_CPU = (1 << DEVICE_CPU),
DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL),
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK),
DEVICE_MASK_ALL = ~0
DEVICE_MASK_CPU = (1 << DEVICE_CPU),
DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL),
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK),
DEVICE_MASK_ALL = ~0
};
enum DeviceKernelStatus {
DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
DEVICE_KERNEL_USING_FEATURE_KERNEL,
DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
DEVICE_KERNEL_UNKNOWN,
DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
DEVICE_KERNEL_USING_FEATURE_KERNEL,
DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
DEVICE_KERNEL_UNKNOWN,
};
#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
class DeviceInfo {
public:
DeviceType type;
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device; /* GPU is used as a display device. */
bool has_half_images; /* Support half-float textures. */
bool has_volume_decoupled; /* Decoupled volume shading. */
bool has_osl; /* Support Open Shading Language. */
bool use_split_kernel; /* Use split or mega kernel. */
bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
public:
DeviceType type;
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device; /* GPU is used as a display device. */
bool has_half_images; /* Support half-float textures. */
bool has_volume_decoupled; /* Decoupled volume shading. */
bool has_osl; /* Support Open Shading Language. */
bool use_split_kernel; /* Use split or mega kernel. */
bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
DeviceInfo()
{
type = DEVICE_CPU;
id = "CPU";
num = 0;
cpu_threads = 0;
display_device = false;
has_half_images = false;
has_volume_decoupled = false;
has_osl = false;
use_split_kernel = false;
has_profiling = false;
}
DeviceInfo()
{
type = DEVICE_CPU;
id = "CPU";
num = 0;
cpu_threads = 0;
display_device = false;
has_half_images = false;
has_volume_decoupled = false;
has_osl = false;
use_split_kernel = false;
has_profiling = false;
}
bool operator==(const DeviceInfo &info) {
/* Multiple Devices with the same ID would be very bad. */
assert(id != info.id || (type == info.type && num == info.num && description == info.description));
return id == info.id;
}
bool operator==(const DeviceInfo &info)
{
/* Multiple Devices with the same ID would be very bad. */
assert(id != info.id ||
(type == info.type && num == info.num && description == info.description));
return id == info.id;
}
};
class DeviceRequestedFeatures {
public:
/* Use experimental feature set. */
bool experimental;
public:
/* Use experimental feature set. */
bool experimental;
/* Selective nodes compilation. */
/* Selective nodes compilation. */
/* Identifier of a node group up to which all the nodes needs to be
* compiled in. Nodes from higher group indices will be ignores.
*/
int max_nodes_group;
/* Identifier of a node group up to which all the nodes needs to be
* compiled in. Nodes from higher group indices will be ignores.
*/
int max_nodes_group;
/* Features bitfield indicating which features from the requested group
* will be compiled in. Nodes which corresponds to features which are not
* in this bitfield will be ignored even if they're in the requested group.
*/
int nodes_features;
/* Features bitfield indicating which features from the requested group
* will be compiled in. Nodes which corresponds to features which are not
* in this bitfield will be ignored even if they're in the requested group.
*/
int nodes_features;
/* BVH/sampling kernel features. */
bool use_hair;
bool use_object_motion;
bool use_camera_motion;
/* BVH/sampling kernel features. */
bool use_hair;
bool use_object_motion;
bool use_camera_motion;
/* Denotes whether baking functionality is needed. */
bool use_baking;
/* Denotes whether baking functionality is needed. */
bool use_baking;
/* Use subsurface scattering materials. */
bool use_subsurface;
/* Use subsurface scattering materials. */
bool use_subsurface;
/* Use volume materials. */
bool use_volume;
/* Use volume materials. */
bool use_volume;
/* Use branched integrator. */
bool use_integrator_branched;
/* Use branched integrator. */
bool use_integrator_branched;
/* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation;
/* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation;
/* Use Transparent shadows */
bool use_transparent;
/* Use Transparent shadows */
bool use_transparent;
/* Use various shadow tricks, such as shadow catcher. */
bool use_shadow_tricks;
/* Use various shadow tricks, such as shadow catcher. */
bool use_shadow_tricks;
/* Per-uber shader usage flags. */
bool use_principled;
/* Per-uber shader usage flags. */
bool use_principled;
/* Denoising features. */
bool use_denoising;
/* Denoising features. */
bool use_denoising;
/* Use raytracing in shaders. */
bool use_shader_raytrace;
/* Use raytracing in shaders. */
bool use_shader_raytrace;
/* Use true displacement */
bool use_true_displacement;
/* Use true displacement */
bool use_true_displacement;
/* Use background lights */
bool use_background_light;
/* Use background lights */
bool use_background_light;
DeviceRequestedFeatures()
{
/* TODO(sergey): Find more meaningful defaults. */
experimental = false;
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
use_object_motion = false;
use_camera_motion = false;
use_baking = false;
use_subsurface = false;
use_volume = false;
use_integrator_branched = false;
use_patch_evaluation = false;
use_transparent = false;
use_shadow_tricks = false;
use_principled = false;
use_denoising = false;
use_shader_raytrace = false;
use_true_displacement = false;
use_background_light = false;
}
DeviceRequestedFeatures()
{
/* TODO(sergey): Find more meaningful defaults. */
experimental = false;
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
use_object_motion = false;
use_camera_motion = false;
use_baking = false;
use_subsurface = false;
use_volume = false;
use_integrator_branched = false;
use_patch_evaluation = false;
use_transparent = false;
use_shadow_tricks = false;
use_principled = false;
use_denoising = false;
use_shader_raytrace = false;
use_true_displacement = false;
use_background_light = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
{
return !(experimental == requested_features.experimental &&
max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking &&
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched &&
use_patch_evaluation == requested_features.use_patch_evaluation &&
use_transparent == requested_features.use_transparent &&
use_shadow_tricks == requested_features.use_shadow_tricks &&
use_principled == requested_features.use_principled &&
use_denoising == requested_features.use_denoising &&
use_shader_raytrace == requested_features.use_shader_raytrace &&
use_true_displacement == requested_features.use_true_displacement &&
use_background_light == requested_features.use_background_light);
}
bool modified(const DeviceRequestedFeatures &requested_features)
{
return !(experimental == requested_features.experimental &&
max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking &&
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched &&
use_patch_evaluation == requested_features.use_patch_evaluation &&
use_transparent == requested_features.use_transparent &&
use_shadow_tricks == requested_features.use_shadow_tricks &&
use_principled == requested_features.use_principled &&
use_denoising == requested_features.use_denoising &&
use_shader_raytrace == requested_features.use_shader_raytrace &&
use_true_displacement == requested_features.use_true_displacement &&
use_background_light == requested_features.use_background_light);
}
/* Convert the requested features structure to a build options,
* which could then be passed to compilers.
*/
string get_build_options() const
{
string build_options = "";
if(experimental) {
build_options += "-D__KERNEL_EXPERIMENTAL__ ";
}
build_options += "-D__NODES_MAX_GROUP__=" +
string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" +
string_printf("%d", nodes_features);
if(!use_hair) {
build_options += " -D__NO_HAIR__";
}
if(!use_object_motion) {
build_options += " -D__NO_OBJECT_MOTION__";
}
if(!use_camera_motion) {
build_options += " -D__NO_CAMERA_MOTION__";
}
if(!use_baking) {
build_options += " -D__NO_BAKING__";
}
if(!use_volume) {
build_options += " -D__NO_VOLUME__";
}
if(!use_subsurface) {
build_options += " -D__NO_SUBSURFACE__";
}
if(!use_integrator_branched) {
build_options += " -D__NO_BRANCHED_PATH__";
}
if(!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
if(!use_transparent && !use_volume) {
build_options += " -D__NO_TRANSPARENT__";
}
if(!use_shadow_tricks) {
build_options += " -D__NO_SHADOW_TRICKS__";
}
if(!use_principled) {
build_options += " -D__NO_PRINCIPLED__";
}
if(!use_denoising) {
build_options += " -D__NO_DENOISING__";
}
if(!use_shader_raytrace) {
build_options += " -D__NO_SHADER_RAYTRACE__";
}
return build_options;
}
/* Convert the requested features structure to a build options,
* which could then be passed to compilers.
*/
string get_build_options() const
{
string build_options = "";
if (experimental) {
build_options += "-D__KERNEL_EXPERIMENTAL__ ";
}
build_options += "-D__NODES_MAX_GROUP__=" + string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
if (!use_hair) {
build_options += " -D__NO_HAIR__";
}
if (!use_object_motion) {
build_options += " -D__NO_OBJECT_MOTION__";
}
if (!use_camera_motion) {
build_options += " -D__NO_CAMERA_MOTION__";
}
if (!use_baking) {
build_options += " -D__NO_BAKING__";
}
if (!use_volume) {
build_options += " -D__NO_VOLUME__";
}
if (!use_subsurface) {
build_options += " -D__NO_SUBSURFACE__";
}
if (!use_integrator_branched) {
build_options += " -D__NO_BRANCHED_PATH__";
}
if (!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
if (!use_transparent && !use_volume) {
build_options += " -D__NO_TRANSPARENT__";
}
if (!use_shadow_tricks) {
build_options += " -D__NO_SHADOW_TRICKS__";
}
if (!use_principled) {
build_options += " -D__NO_PRINCIPLED__";
}
if (!use_denoising) {
build_options += " -D__NO_DENOISING__";
}
if (!use_shader_raytrace) {
build_options += " -D__NO_SHADER_RAYTRACE__";
}
return build_options;
}
};
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features);
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features);
/* Device */
struct DeviceDrawParams {
function<void()> bind_display_space_shader_cb;
function<void()> unbind_display_space_shader_cb;
function<void()> bind_display_space_shader_cb;
function<void()> unbind_display_space_shader_cb;
};
class Device {
friend class device_sub_ptr;
protected:
enum {
FALLBACK_SHADER_STATUS_NONE = 0,
FALLBACK_SHADER_STATUS_ERROR,
FALLBACK_SHADER_STATUS_SUCCESS,
};
friend class device_sub_ptr;
Device(DeviceInfo& info_, Stats &stats_, Profiler &profiler_, bool background) : background(background),
vertex_buffer(0),
fallback_status(FALLBACK_SHADER_STATUS_NONE), fallback_shader_program(0),
info(info_), stats(stats_), profiler(profiler_) {}
protected:
enum {
FALLBACK_SHADER_STATUS_NONE = 0,
FALLBACK_SHADER_STATUS_ERROR,
FALLBACK_SHADER_STATUS_SUCCESS,
};
bool background;
string error_msg;
Device(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background)
: background(background),
vertex_buffer(0),
fallback_status(FALLBACK_SHADER_STATUS_NONE),
fallback_shader_program(0),
info(info_),
stats(stats_),
profiler(profiler_)
{
}
/* used for real time display */
unsigned int vertex_buffer;
int fallback_status, fallback_shader_program;
int image_texture_location, fullscreen_location;
bool background;
string error_msg;
bool bind_fallback_display_space_shader(const float width, const float height);
/* used for real time display */
unsigned int vertex_buffer;
int fallback_status, fallback_shader_program;
int image_texture_location, fullscreen_location;
virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/)
{
/* Only required for devices that implement denoising. */
assert(false);
return (device_ptr) 0;
}
virtual void mem_free_sub_ptr(device_ptr /*ptr*/) {};
bool bind_fallback_display_space_shader(const float width, const float height);
public:
virtual ~Device();
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
{
/* Only required for devices that implement denoising. */
assert(false);
return (device_ptr)0;
}
virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
/* info */
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
virtual void set_error(const string& error)
{
if(!have_error()) {
error_msg = error;
}
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const { return false; }
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
public:
virtual ~Device();
/* statistics */
Stats &stats;
Profiler &profiler;
/* info */
DeviceInfo info;
virtual const string &error_message()
{
return error_msg;
}
bool have_error()
{
return !error_message().empty();
}
virtual void set_error(const string &error)
{
if (!have_error()) {
error_msg = error;
}
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const
{
return false;
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* memory alignment */
virtual int mem_sub_ptr_alignment() { return MIN_ALIGNMENT_CPU_DATA_TYPES; }
/* statistics */
Stats &stats;
Profiler &profiler;
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
/* memory alignment */
virtual int mem_sub_ptr_alignment()
{
return MIN_ALIGNMENT_CPU_DATA_TYPES;
}
/* open shading language, only for CPU device */
virtual void *osl_memory() { return NULL; }
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
/* load/compile kernels, must be called before adding tasks */
virtual bool load_kernels(
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
/* open shading language, only for CPU device */
virtual void *osl_memory()
{
return NULL;
}
/* Wait for device to become available to upload data and receive tasks
* This method is used by the OpenCL device to load the
* optimized kernels or when not (yet) available load the
* generic kernels (only during foreground rendering) */
virtual bool wait_for_availability(
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{ return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
/* load/compile kernels, must be called before adding tasks */
virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* tasks */
virtual int get_split_task_count(DeviceTask& task) = 0;
virtual void task_add(DeviceTask& task) = 0;
virtual void task_wait() = 0;
virtual void task_cancel() = 0;
/* Wait for device to become available to upload data and receive tasks
* This method is used by the OpenCL device to load the
* optimized kernels or when not (yet) available load the
* generic kernels (only during foreground rendering) */
virtual bool wait_for_availability(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
/* opengl drawing */
virtual void draw_pixels(device_memory& mem, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params);
/* tasks */
virtual int get_split_task_count(DeviceTask &task) = 0;
virtual void task_add(DeviceTask &task) = 0;
virtual void task_wait() = 0;
virtual void task_cancel() = 0;
/* opengl drawing */
virtual void draw_pixels(device_memory &mem,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params);
#ifdef WITH_NETWORK
/* networking */
void server_run();
/* networking */
void server_run();
#endif
/* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {}
virtual int device_number(Device * /*sub_device*/) { return 0; }
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
/* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile & /*tile*/)
{
}
virtual int device_number(Device * /*sub_device*/)
{
return 0;
}
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
/* static */
static Device *create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background = true);
/* static */
static Device *create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background = true);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);
static vector<DeviceType> available_types();
static vector<DeviceInfo> available_devices(uint device_type_mask = DEVICE_MASK_ALL);
static string device_capabilities(uint device_type_mask = DEVICE_MASK_ALL);
static DeviceInfo get_multi_device(const vector<DeviceInfo>& subdevices,
int threads,
bool background);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);
static vector<DeviceType> available_types();
static vector<DeviceInfo> available_devices(uint device_type_mask = DEVICE_MASK_ALL);
static string device_capabilities(uint device_type_mask = DEVICE_MASK_ALL);
static DeviceInfo get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background);
/* Tag devices lists for update. */
static void tag_update();
/* Tag devices lists for update. */
static void tag_update();
static void free_memory();
static void free_memory();
protected:
/* Memory allocation, only accessed through device_memory. */
friend class MultiDevice;
friend class DeviceServer;
friend class device_memory;
protected:
/* Memory allocation, only accessed through device_memory. */
friend class MultiDevice;
friend class DeviceServer;
friend class device_memory;
virtual void mem_alloc(device_memory& mem) = 0;
virtual void mem_copy_to(device_memory& mem) = 0;
virtual void mem_copy_from(device_memory& mem,
int y, int w, int h, int elem) = 0;
virtual void mem_zero(device_memory& mem) = 0;
virtual void mem_free(device_memory& mem) = 0;
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;
private:
/* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update;
static thread_mutex device_mutex;
static vector<DeviceInfo> cuda_devices;
static vector<DeviceInfo> opencl_devices;
static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> network_devices;
static uint devices_initialized_mask;
private:
/* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update;
static thread_mutex device_mutex;
static vector<DeviceInfo> cuda_devices;
static vector<DeviceInfo> opencl_devices;
static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> network_devices;
static uint devices_initialized_mask;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_H__ */
#endif /* __DEVICE_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -21,314 +21,329 @@
CCL_NAMESPACE_BEGIN
DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
profiler(NULL),
storage(device),
buffer(device),
device(device)
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
profiler(NULL),
storage(device),
buffer(device),
device(device)
{
radius = task.denoising.radius;
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength));
if(task.denoising.relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength));
}
else {
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising.feature_strength));
}
radius = task.denoising.radius;
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength));
if (task.denoising.relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength));
}
else {
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising.feature_strength));
}
render_buffer.frame_stride = task.frame_stride;
render_buffer.pass_stride = task.pass_stride;
render_buffer.offset = task.pass_denoising_data;
render_buffer.frame_stride = task.frame_stride;
render_buffer.pass_stride = task.pass_stride;
render_buffer.offset = task.pass_denoising_data;
target_buffer.pass_stride = task.target_pass_stride;
target_buffer.denoising_clean_offset = task.pass_denoising_clean;
target_buffer.offset = 0;
target_buffer.pass_stride = task.target_pass_stride;
target_buffer.denoising_clean_offset = task.pass_denoising_clean;
target_buffer.offset = 0;
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int));
tile_info->from_render = task.denoising_from_render? 1 : 0;
tile_info = (TileInfo *)tile_info_mem.alloc(sizeof(TileInfo) / sizeof(int));
tile_info->from_render = task.denoising_from_render ? 1 : 0;
tile_info->frames[0] = 0;
tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
for(int i = 1; i < tile_info->num_frames; i++) {
tile_info->frames[i] = task.denoising_frames[i-1];
}
tile_info->frames[0] = 0;
tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
for (int i = 1; i < tile_info->num_frames; i++) {
tile_info->frames[i] = task.denoising_frames[i - 1];
}
write_passes = task.denoising_write_passes;
do_filter = task.denoising_do_filter;
write_passes = task.denoising_write_passes;
do_filter = task.denoising_do_filter;
}
DenoisingTask::~DenoisingTask()
{
storage.XtWX.free();
storage.XtWY.free();
storage.transform.free();
storage.rank.free();
buffer.mem.free();
buffer.temporary_mem.free();
tile_info_mem.free();
storage.XtWX.free();
storage.XtWY.free();
storage.transform.free();
storage.rank.free();
buffer.mem.free();
buffer.temporary_mem.free();
tile_info_mem.free();
}
void DenoisingTask::set_render_buffer(RenderTile *rtiles)
{
for(int i = 0; i < 9; i++) {
tile_info->offsets[i] = rtiles[i].offset;
tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer;
}
tile_info->x[0] = rtiles[3].x;
tile_info->x[1] = rtiles[4].x;
tile_info->x[2] = rtiles[5].x;
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
tile_info->y[0] = rtiles[1].y;
tile_info->y[1] = rtiles[4].y;
tile_info->y[2] = rtiles[7].y;
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
for (int i = 0; i < 9; i++) {
tile_info->offsets[i] = rtiles[i].offset;
tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer;
}
tile_info->x[0] = rtiles[3].x;
tile_info->x[1] = rtiles[4].x;
tile_info->x[2] = rtiles[5].x;
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
tile_info->y[0] = rtiles[1].y;
tile_info->y[1] = rtiles[4].y;
tile_info->y[2] = rtiles[7].y;
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
target_buffer.offset = rtiles[9].offset;
target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer;
target_buffer.offset = rtiles[9].offset;
target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer;
if(write_passes && rtiles[9].buffers) {
target_buffer.denoising_output_offset = rtiles[9].buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
}
if (write_passes && rtiles[9].buffers) {
target_buffer.denoising_output_offset =
rtiles[9].buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
}
tile_info_mem.copy_to_device();
tile_info_mem.copy_to_device();
}
void DenoisingTask::setup_denoising_buffer()
{
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius);
rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius);
rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y;
int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
buffer.frame_stride = buffer.pass_stride * buffer.passes;
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats);
buffer.mem.alloc_to_device(mem_size, false);
buffer.use_time = (tile_info->num_frames > 1);
buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity ? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y;
int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
buffer.frame_stride = buffer.pass_stride * buffer.passes;
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats);
buffer.mem.alloc_to_device(mem_size, false);
buffer.use_time = (tile_info->num_frames > 1);
/* CPUs process shifts sequentially while GPUs process them in parallel. */
int num_layers;
if(buffer.gpu_temporary_mem) {
/* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
int max_radius = max(radius, 6);
int num_shifts = (2*max_radius + 1) * (2*max_radius + 1);
num_layers = 2*num_shifts + 1;
}
else {
num_layers = 3;
}
/* Allocate two layers per shift as well as one for the weight accumulation. */
buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride);
/* CPUs process shifts sequentially while GPUs process them in parallel. */
int num_layers;
if (buffer.gpu_temporary_mem) {
/* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
int max_radius = max(radius, 6);
int num_shifts = (2 * max_radius + 1) * (2 * max_radius + 1);
num_layers = 2 * num_shifts + 1;
}
else {
num_layers = 3;
}
/* Allocate two layers per shift as well as one for the weight accumulation. */
buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride);
}
void DenoisingTask::prefilter_shadowing()
{
device_ptr null_ptr = (device_ptr) 0;
device_ptr null_ptr = (device_ptr)0;
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr unfiltered_a(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr unfiltered_b(buffer.mem, 1 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var(buffer.mem, 2 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var(buffer.mem, 3 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var,
filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
/* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
device_ptr residual_var = *sample_var_var;
/* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr residual_var = *sample_var_var;
/* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr final_a = *unfiltered_a,
final_b = *unfiltered_b;
/* Use the residual variance for a second filter pass. */
nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
device_ptr final_a = *unfiltered_a, final_b = *unfiltered_b;
/* Use the residual variance for a second filter pass. */
nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
/* Combine the two double-filtered halves to a final shadow feature. */
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
/* Combine the two double-filtered halves to a final shadow feature. */
device_sub_ptr shadow_pass(buffer.mem, 4 * buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
}
void DenoisingTask::prefilter_features()
{
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr unfiltered(buffer.mem, 8 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance(buffer.mem, 9 * buffer.pass_stride, buffer.pass_stride);
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance, 1.0f / render_buffer.samples);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
}
int mean_from[] = {0, 1, 2, 12, 6, 7, 8};
int variance_from[] = {3, 4, 5, 13, 9, 10, 11};
int pass_to[] = {1, 2, 3, 0, 5, 6, 7};
for (int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(
buffer.mem, pass_to[pass] * buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass],
variance_from[pass],
*unfiltered,
*variance,
1.0f / render_buffer.samples);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
}
}
void DenoisingTask::prefilter_color()
{
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
int mean_to[] = { 8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
int mean_to[] = {8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
device_only_memory<float> temporary_color(device, "denoising temporary color");
temporary_color.alloc_to_device(3*buffer.pass_stride, false);
device_only_memory<float> temporary_color(device, "denoising temporary color");
temporary_color.alloc_to_device(3 * buffer.pass_stride, false);
for(int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass, 1.0f / render_buffer.samples);
}
for (int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[pass] * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass],
variance_from[pass],
*color_pass,
*color_var_pass,
1.0f / render_buffer.samples);
}
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
functions.detect_outliers(temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
device_sub_ptr depth_pass(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr output_pass(buffer.mem, mean_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
functions.detect_outliers(
temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
if(buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride);
nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2*4.0f, true);
functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass);
}
if (buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2 * 4.0f, true);
functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass);
}
}
void DenoisingTask::load_buffer()
{
device_ptr null_ptr = (device_ptr) 0;
device_ptr null_ptr = (device_ptr)0;
int original_offset = render_buffer.offset;
int original_offset = render_buffer.offset;
int num_passes = buffer.use_intensity? 15 : 14;
for(int i = 0; i < tile_info->num_frames; i++) {
for(int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(buffer.mem, i*buffer.frame_stride + pass*buffer.pass_stride, buffer.pass_stride);
bool is_variance = (pass >= 11) && (pass <= 13);
functions.get_feature(pass, -1, *to_pass, null_ptr, is_variance? (1.0f / render_buffer.samples) : 1.0f);
}
render_buffer.offset += render_buffer.frame_stride;
}
int num_passes = buffer.use_intensity ? 15 : 14;
for (int i = 0; i < tile_info->num_frames; i++) {
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(
buffer.mem, i * buffer.frame_stride + pass * buffer.pass_stride, buffer.pass_stride);
bool is_variance = (pass >= 11) && (pass <= 13);
functions.get_feature(
pass, -1, *to_pass, null_ptr, is_variance ? (1.0f / render_buffer.samples) : 1.0f);
}
render_buffer.offset += render_buffer.frame_stride;
}
render_buffer.offset = original_offset;
render_buffer.offset = original_offset;
}
void DenoisingTask::write_buffer()
{
reconstruction_state.buffer_params = make_int4(target_buffer.offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
int num_passes = buffer.use_intensity? 15 : 14;
for(int pass = 0; pass < num_passes; pass++) {
device_sub_ptr from_pass(buffer.mem, pass*buffer.pass_stride, buffer.pass_stride);
int out_offset = pass + target_buffer.denoising_output_offset;
functions.write_feature(out_offset, *from_pass, target_buffer.ptr);
}
reconstruction_state.buffer_params = make_int4(target_buffer.offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
int num_passes = buffer.use_intensity ? 15 : 14;
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr from_pass(buffer.mem, pass * buffer.pass_stride, buffer.pass_stride);
int out_offset = pass + target_buffer.denoising_output_offset;
functions.write_feature(out_offset, *from_pass, target_buffer.ptr);
}
}
void DenoisingTask::construct_transform()
{
storage.w = filter_area.z;
storage.h = filter_area.w;
storage.w = filter_area.z;
storage.h = filter_area.w;
storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w*storage.h, false);
storage.transform.alloc_to_device(storage.w * storage.h * TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w * storage.h, false);
functions.construct_transform();
functions.construct_transform();
}
void DenoisingTask::reconstruct()
{
storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE, false);
storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false);
storage.XtWX.zero_to_device();
storage.XtWY.zero_to_device();
storage.XtWX.alloc_to_device(storage.w * storage.h * XTWX_SIZE, false);
storage.XtWY.alloc_to_device(storage.w * storage.h * XTWY_SIZE, false);
storage.XtWX.zero_to_device();
storage.XtWY.zero_to_device();
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y*target_buffer.stride + filter_area.x;
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
reconstruction_state.source_w = rect.z-rect.x;
reconstruction_state.source_h = rect.w-rect.y;
reconstruction_state.filter_window = rect_from_shape(
filter_area.x - rect.x, filter_area.y - rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y * target_buffer.stride + filter_area.x;
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
reconstruction_state.source_w = rect.z - rect.x;
reconstruction_state.source_h = rect.w - rect.y;
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
for(int f = 0; f < tile_info->num_frames; f++) {
device_ptr scale_ptr = 0;
device_sub_ptr *scale_sub_ptr = NULL;
if(tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
scale_sub_ptr = new device_sub_ptr(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride);
scale_ptr = **scale_sub_ptr;
}
device_sub_ptr color_ptr(buffer.mem, 8 * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11 * buffer.pass_stride, 3 * buffer.pass_stride);
for (int f = 0; f < tile_info->num_frames; f++) {
device_ptr scale_ptr = 0;
device_sub_ptr *scale_sub_ptr = NULL;
if (tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
scale_sub_ptr = new device_sub_ptr(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
scale_ptr = **scale_sub_ptr;
}
functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f);
delete scale_sub_ptr;
}
functions.solve(target_buffer.ptr);
functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f);
delete scale_sub_ptr;
}
functions.solve(target_buffer.ptr);
}
void DenoisingTask::run_denoising(RenderTile *tile)
{
RenderTile rtiles[10];
rtiles[4] = *tile;
functions.map_neighbor_tiles(rtiles);
set_render_buffer(rtiles);
RenderTile rtiles[10];
rtiles[4] = *tile;
functions.map_neighbor_tiles(rtiles);
set_render_buffer(rtiles);
setup_denoising_buffer();
setup_denoising_buffer();
if(tile_info->from_render) {
prefilter_shadowing();
prefilter_features();
prefilter_color();
}
else {
load_buffer();
}
if (tile_info->from_render) {
prefilter_shadowing();
prefilter_features();
prefilter_color();
}
else {
load_buffer();
}
if(do_filter) {
construct_transform();
reconstruct();
}
if (do_filter) {
construct_transform();
reconstruct();
}
if(write_passes) {
write_buffer();
}
if (write_passes) {
write_buffer();
}
functions.unmap_neighbor_tiles(rtiles);
functions.unmap_neighbor_tiles(rtiles);
}
CCL_NAMESPACE_END

View File

@@ -28,165 +28,169 @@
CCL_NAMESPACE_BEGIN
class DenoisingTask {
public:
/* Parameters of the denoising algorithm. */
int radius;
float nlm_k_2;
float pca_threshold;
public:
/* Parameters of the denoising algorithm. */
int radius;
float nlm_k_2;
float pca_threshold;
/* Parameters of the RenderBuffers. */
struct RenderBuffers {
int offset;
int pass_stride;
int frame_stride;
int samples;
} render_buffer;
/* Parameters of the RenderBuffers. */
struct RenderBuffers {
int offset;
int pass_stride;
int frame_stride;
int samples;
} render_buffer;
/* Pointer and parameters of the target buffer. */
struct TargetBuffer {
int offset;
int stride;
int pass_stride;
int denoising_clean_offset;
int denoising_output_offset;
device_ptr ptr;
} target_buffer;
/* Pointer and parameters of the target buffer. */
struct TargetBuffer {
int offset;
int stride;
int pass_stride;
int denoising_clean_offset;
int denoising_output_offset;
device_ptr ptr;
} target_buffer;
TileInfo *tile_info;
device_vector<int> tile_info_mem;
TileInfo *tile_info;
device_vector<int> tile_info_mem;
ProfilingState *profiler;
ProfilingState *profiler;
int4 rect;
int4 filter_area;
int4 rect;
int4 filter_area;
bool write_passes;
bool do_filter;
bool write_passes;
bool do_filter;
struct DeviceFunctions {
function<bool(device_ptr image_ptr, /* Contains the values that are smoothed. */
device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */
device_ptr variance_ptr, /* Contains the variance of the guide image. */
device_ptr out_ptr /* The filtered output is written into this image. */
)> non_local_means;
function<bool(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
int frame
)> accumulate;
function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform;
struct DeviceFunctions {
function<bool(
device_ptr image_ptr, /* Contains the values that are smoothed. */
device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */
device_ptr variance_ptr, /* Contains the variance of the guide image. */
device_ptr out_ptr /* The filtered output is written into this image. */
)>
non_local_means;
function<bool(
device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)>
accumulate;
function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect
)> combine_halves;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr
)> divide_shadow;
function<bool(int mean_offset,
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
float scale
)> get_feature;
function<bool(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr
)> detect_outliers;
function<bool(int out_offset,
device_ptr frop_ptr,
device_ptr buffer_ptr
)> write_feature;
function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
} functions;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect)>
combine_halves;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr)>
divide_shadow;
function<bool(int mean_offset,
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
float scale)>
get_feature;
function<bool(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
} functions;
/* Stores state of the current Reconstruction operation,
* which is accessed by the device in order to perform the operation. */
struct ReconstructionState {
int4 filter_window;
int4 buffer_params;
/* Stores state of the current Reconstruction operation,
* which is accessed by the device in order to perform the operation. */
struct ReconstructionState {
int4 filter_window;
int4 buffer_params;
int source_w;
int source_h;
} reconstruction_state;
int source_w;
int source_h;
} reconstruction_state;
/* Stores state of the current NLM operation,
* which is accessed by the device in order to perform the operation. */
struct NLMState {
int r; /* Search radius of the filter. */
int f; /* Patch size of the filter. */
float a; /* Variance compensation factor in the MSE estimation. */
float k_2; /* Squared value of the k parameter of the filter. */
bool is_color;
/* Stores state of the current NLM operation,
* which is accessed by the device in order to perform the operation. */
struct NLMState {
int r; /* Search radius of the filter. */
int f; /* Patch size of the filter. */
float a; /* Variance compensation factor in the MSE estimation. */
float k_2; /* Squared value of the k parameter of the filter. */
bool is_color;
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_) { r = r_; f = f_; a = a_, k_2 = k_2_; is_color = is_color_; }
} nlm_state;
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_)
{
r = r_;
f = f_;
a = a_, k_2 = k_2_;
is_color = is_color_;
}
} nlm_state;
struct Storage {
device_only_memory<float> transform;
device_only_memory<int> rank;
device_only_memory<float> XtWX;
device_only_memory<float3> XtWY;
int w;
int h;
struct Storage {
device_only_memory<float> transform;
device_only_memory<int> rank;
device_only_memory<float> XtWX;
device_only_memory<float3> XtWY;
int w;
int h;
Storage(Device *device)
: transform(device, "denoising transform"),
rank(device, "denoising rank"),
XtWX(device, "denoising XtWX"),
XtWY(device, "denoising XtWY")
{}
} storage;
Storage(Device *device)
: transform(device, "denoising transform"),
rank(device, "denoising rank"),
XtWX(device, "denoising XtWX"),
XtWY(device, "denoising XtWY")
{
}
} storage;
DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask();
DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask();
void run_denoising(RenderTile *tile);
void run_denoising(RenderTile *tile);
struct DenoiseBuffers {
int pass_stride;
int passes;
int stride;
int h;
int width;
int frame_stride;
device_only_memory<float> mem;
device_only_memory<float> temporary_mem;
bool use_time;
bool use_intensity;
struct DenoiseBuffers {
int pass_stride;
int passes;
int stride;
int h;
int width;
int frame_stride;
device_only_memory<float> mem;
device_only_memory<float> temporary_mem;
bool use_time;
bool use_intensity;
bool gpu_temporary_mem;
bool gpu_temporary_mem;
DenoiseBuffers(Device *device)
: mem(device, "denoising pixel buffer"),
temporary_mem(device, "denoising temporary mem")
{}
} buffer;
DenoiseBuffers(Device *device)
: mem(device, "denoising pixel buffer"), temporary_mem(device, "denoising temporary mem")
{
}
} buffer;
protected:
Device *device;
protected:
Device *device;
void set_render_buffer(RenderTile *rtiles);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();
void prefilter_color();
void construct_transform();
void reconstruct();
void set_render_buffer(RenderTile *rtiles);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();
void prefilter_color();
void construct_transform();
void reconstruct();
void load_buffer();
void write_buffer();
void load_buffer();
void write_buffer();
};
CCL_NAMESPACE_END
#endif /* __DEVICE_DENOISING_H__ */
#endif /* __DEVICE_DENOISING_H__ */

View File

@@ -21,19 +21,22 @@ CCL_NAMESPACE_BEGIN
class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_compile_kernel(const vector<string>& parameters);
Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_compile_kernel(const vector<string> &parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const char *address);
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
void device_cpu_info(vector<DeviceInfo>& devices);
void device_opencl_info(vector<DeviceInfo>& devices);
void device_cuda_info(vector<DeviceInfo>& devices);
void device_network_info(vector<DeviceInfo>& devices);
void device_cpu_info(vector<DeviceInfo> &devices);
void device_opencl_info(vector<DeviceInfo> &devices);
void device_cuda_info(vector<DeviceInfo> &devices);
void device_network_info(vector<DeviceInfo> &devices);
string device_cpu_capabilities();
string device_opencl_capabilities();
@@ -41,4 +44,4 @@ string device_cuda_capabilities();
CCL_NAMESPACE_END
#endif /* __DEVICE_INTERN_H__ */
#endif /* __DEVICE_INTERN_H__ */

View File

@@ -22,21 +22,21 @@ CCL_NAMESPACE_BEGIN
/* Device Memory */
device_memory::device_memory(Device *device, const char *name, MemoryType type)
: data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements),
data_size(0),
device_size(0),
data_width(0),
data_height(0),
data_depth(0),
type(type),
name(name),
interpolation(INTERPOLATION_NONE),
extension(EXTENSION_REPEAT),
device(device),
device_pointer(0),
host_pointer(0),
shared_pointer(0)
: data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements),
data_size(0),
device_size(0),
data_width(0),
data_height(0),
data_depth(0),
type(type),
name(name),
interpolation(INTERPOLATION_NONE),
extension(EXTENSION_REPEAT),
device(device),
device_pointer(0),
host_pointer(0),
shared_pointer(0)
{
}
@@ -46,95 +46,94 @@ device_memory::~device_memory()
void *device_memory::host_alloc(size_t size)
{
if(!size) {
return 0;
}
if (!size) {
return 0;
}
void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
if(ptr) {
util_guarded_mem_alloc(size);
}
else {
throw std::bad_alloc();
}
if (ptr) {
util_guarded_mem_alloc(size);
}
else {
throw std::bad_alloc();
}
return ptr;
return ptr;
}
void device_memory::host_free()
{
if(host_pointer) {
util_guarded_mem_free(memory_size());
util_aligned_free((void*)host_pointer);
host_pointer = 0;
}
if (host_pointer) {
util_guarded_mem_free(memory_size());
util_aligned_free((void *)host_pointer);
host_pointer = 0;
}
}
void device_memory::device_alloc()
{
assert(!device_pointer && type != MEM_TEXTURE);
device->mem_alloc(*this);
assert(!device_pointer && type != MEM_TEXTURE);
device->mem_alloc(*this);
}
void device_memory::device_free()
{
if(device_pointer) {
device->mem_free(*this);
}
if (device_pointer) {
device->mem_free(*this);
}
}
void device_memory::device_copy_to()
{
if(host_pointer) {
device->mem_copy_to(*this);
}
if (host_pointer) {
device->mem_copy_to(*this);
}
}
void device_memory::device_copy_from(int y, int w, int h, int elem)
{
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY);
device->mem_copy_from(*this, y, w, h, elem);
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY);
device->mem_copy_from(*this, y, w, h, elem);
}
void device_memory::device_zero()
{
if(data_size) {
device->mem_zero(*this);
}
if (data_size) {
device->mem_zero(*this);
}
}
void device_memory::swap_device(Device *new_device,
size_t new_device_size,
device_ptr new_device_ptr)
{
original_device = device;
original_device_size = device_size;
original_device_ptr = device_pointer;
original_device = device;
original_device_size = device_size;
original_device_ptr = device_pointer;
device = new_device;
device_size = new_device_size;
device_pointer = new_device_ptr;
device = new_device;
device_size = new_device_size;
device_pointer = new_device_ptr;
}
void device_memory::restore_device()
{
device = original_device;
device_size = original_device_size;
device_pointer = original_device_ptr;
device = original_device;
device_size = original_device_size;
device_pointer = original_device_ptr;
}
/* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
: device(mem.device)
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
{
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
}
device_sub_ptr::~device_sub_ptr()
{
device->mem_free_sub_ptr(ptr);
device->mem_free_sub_ptr(ptr);
}
CCL_NAMESPACE_END

View File

@@ -31,152 +31,155 @@ CCL_NAMESPACE_BEGIN
class Device;
enum MemoryType {
MEM_READ_ONLY,
MEM_READ_WRITE,
MEM_DEVICE_ONLY,
MEM_TEXTURE,
MEM_PIXELS
};
enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS };
/* Supported Data Types */
enum DataType {
TYPE_UNKNOWN,
TYPE_UCHAR,
TYPE_UINT16,
TYPE_UINT,
TYPE_INT,
TYPE_FLOAT,
TYPE_HALF,
TYPE_UINT64,
TYPE_UNKNOWN,
TYPE_UCHAR,
TYPE_UINT16,
TYPE_UINT,
TYPE_INT,
TYPE_FLOAT,
TYPE_HALF,
TYPE_UINT64,
};
static inline size_t datatype_size(DataType datatype)
{
switch(datatype) {
case TYPE_UNKNOWN: return 1;
case TYPE_UCHAR: return sizeof(uchar);
case TYPE_FLOAT: return sizeof(float);
case TYPE_UINT: return sizeof(uint);
case TYPE_UINT16: return sizeof(uint16_t);
case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
case TYPE_UINT64: return sizeof(uint64_t);
default: return 0;
}
switch (datatype) {
case TYPE_UNKNOWN:
return 1;
case TYPE_UCHAR:
return sizeof(uchar);
case TYPE_FLOAT:
return sizeof(float);
case TYPE_UINT:
return sizeof(uint);
case TYPE_UINT16:
return sizeof(uint16_t);
case TYPE_INT:
return sizeof(int);
case TYPE_HALF:
return sizeof(half);
case TYPE_UINT64:
return sizeof(uint64_t);
default:
return 0;
}
}
/* Traits for data types */
template<typename T> struct device_type_traits {
static const DataType data_type = TYPE_UNKNOWN;
static const int num_elements = sizeof(T);
static const DataType data_type = TYPE_UNKNOWN;
static const int num_elements = sizeof(T);
};
template<> struct device_type_traits<uchar> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 1;
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 1;
};
template<> struct device_type_traits<uchar2> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 2;
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 2;
};
template<> struct device_type_traits<uchar3> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 3;
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 3;
};
template<> struct device_type_traits<uchar4> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 4;
static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 4;
};
template<> struct device_type_traits<uint> {
static const DataType data_type = TYPE_UINT;
static const int num_elements = 1;
static const DataType data_type = TYPE_UINT;
static const int num_elements = 1;
};
template<> struct device_type_traits<uint2> {
static const DataType data_type = TYPE_UINT;
static const int num_elements = 2;
static const DataType data_type = TYPE_UINT;
static const int num_elements = 2;
};
template<> struct device_type_traits<uint3> {
static const DataType data_type = TYPE_UINT;
static const int num_elements = 3;
static const DataType data_type = TYPE_UINT;
static const int num_elements = 3;
};
template<> struct device_type_traits<uint4> {
static const DataType data_type = TYPE_UINT;
static const int num_elements = 4;
static const DataType data_type = TYPE_UINT;
static const int num_elements = 4;
};
template<> struct device_type_traits<int> {
static const DataType data_type = TYPE_INT;
static const int num_elements = 1;
static const DataType data_type = TYPE_INT;
static const int num_elements = 1;
};
template<> struct device_type_traits<int2> {
static const DataType data_type = TYPE_INT;
static const int num_elements = 2;
static const DataType data_type = TYPE_INT;
static const int num_elements = 2;
};
template<> struct device_type_traits<int3> {
static const DataType data_type = TYPE_INT;
static const int num_elements = 3;
static const DataType data_type = TYPE_INT;
static const int num_elements = 3;
};
template<> struct device_type_traits<int4> {
static const DataType data_type = TYPE_INT;
static const int num_elements = 4;
static const DataType data_type = TYPE_INT;
static const int num_elements = 4;
};
template<> struct device_type_traits<float> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 1;
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 1;
};
template<> struct device_type_traits<float2> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 2;
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 2;
};
template<> struct device_type_traits<float3> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 4;
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 4;
};
template<> struct device_type_traits<float4> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 4;
static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 4;
};
template<> struct device_type_traits<half> {
static const DataType data_type = TYPE_HALF;
static const int num_elements = 1;
static const DataType data_type = TYPE_HALF;
static const int num_elements = 1;
};
template<> struct device_type_traits<ushort4> {
static const DataType data_type = TYPE_UINT16;
static const int num_elements = 4;
static const DataType data_type = TYPE_UINT16;
static const int num_elements = 4;
};
template<> struct device_type_traits<uint16_t> {
static const DataType data_type = TYPE_UINT16;
static const int num_elements = 1;
static const DataType data_type = TYPE_UINT16;
static const int num_elements = 1;
};
template<> struct device_type_traits<half4> {
static const DataType data_type = TYPE_HALF;
static const int num_elements = 4;
static const DataType data_type = TYPE_HALF;
static const int num_elements = 4;
};
template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64;
static const int num_elements = 1;
static const DataType data_type = TYPE_UINT64;
static const int num_elements = 1;
};
/* Device Memory
@@ -184,64 +187,67 @@ template<> struct device_type_traits<uint64_t> {
* Base class for all device memory. This should not be allocated directly,
* instead the appropriate subclass can be used. */
class device_memory
{
public:
size_t memory_size() { return data_size*data_elements*datatype_size(data_type); }
size_t memory_elements_size(int elements) {
return elements*data_elements*datatype_size(data_type);
}
class device_memory {
public:
size_t memory_size()
{
return data_size * data_elements * datatype_size(data_type);
}
size_t memory_elements_size(int elements)
{
return elements * data_elements * datatype_size(data_type);
}
/* Data information. */
DataType data_type;
int data_elements;
size_t data_size;
size_t device_size;
size_t data_width;
size_t data_height;
size_t data_depth;
MemoryType type;
const char *name;
InterpolationType interpolation;
ExtensionType extension;
/* Data information. */
DataType data_type;
int data_elements;
size_t data_size;
size_t device_size;
size_t data_width;
size_t data_height;
size_t data_depth;
MemoryType type;
const char *name;
InterpolationType interpolation;
ExtensionType extension;
/* Pointers. */
Device *device;
device_ptr device_pointer;
void *host_pointer;
void *shared_pointer;
/* Pointers. */
Device *device;
device_ptr device_pointer;
void *host_pointer;
void *shared_pointer;
virtual ~device_memory();
virtual ~device_memory();
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
void restore_device();
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
void restore_device();
protected:
friend class CUDADevice;
protected:
friend class CUDADevice;
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
/* No copying allowed. */
device_memory(const device_memory&);
device_memory& operator = (const device_memory&);
/* No copying allowed. */
device_memory(const device_memory &);
device_memory &operator=(const device_memory &);
/* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using
* the same pointer for host and device. */
void *host_alloc(size_t size);
void host_free();
/* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using
* the same pointer for host and device. */
void *host_alloc(size_t size);
void host_free();
/* Device memory allocation and copying. */
void device_alloc();
void device_free();
void device_copy_to();
void device_copy_from(int y, int w, int h, int elem);
void device_zero();
/* Device memory allocation and copying. */
void device_alloc();
void device_free();
void device_copy_to();
void device_copy_from(int y, int w, int h, int elem);
void device_zero();
device_ptr original_device_ptr;
size_t original_device_size;
Device *original_device;
device_ptr original_device_ptr;
size_t original_device_size;
Device *original_device;
};
/* Device Only Memory
@@ -249,51 +255,49 @@ protected:
* Working memory only needed by the device, with no corresponding allocation
* on the host. Only used internally in the device implementations. */
template<typename T>
class device_only_memory : public device_memory
{
public:
device_only_memory(Device *device, const char *name)
: device_memory(device, name, MEM_DEVICE_ONLY)
{
data_type = device_type_traits<T>::data_type;
data_elements = max(device_type_traits<T>::num_elements, 1);
}
template<typename T> class device_only_memory : public device_memory {
public:
device_only_memory(Device *device, const char *name)
: device_memory(device, name, MEM_DEVICE_ONLY)
{
data_type = device_type_traits<T>::data_type;
data_elements = max(device_type_traits<T>::num_elements, 1);
}
virtual ~device_only_memory()
{
free();
}
virtual ~device_only_memory()
{
free();
}
void alloc_to_device(size_t num, bool shrink_to_fit = true)
{
size_t new_size = num;
bool reallocate;
void alloc_to_device(size_t num, bool shrink_to_fit = true)
{
size_t new_size = num;
bool reallocate;
if(shrink_to_fit) {
reallocate = (data_size != new_size);
}
else {
reallocate = (data_size < new_size);
}
if (shrink_to_fit) {
reallocate = (data_size != new_size);
}
else {
reallocate = (data_size < new_size);
}
if(reallocate) {
device_free();
data_size = new_size;
device_alloc();
}
}
if (reallocate) {
device_free();
data_size = new_size;
device_alloc();
}
}
void free()
{
device_free();
data_size = 0;
}
void free()
{
device_free();
data_size = 0;
}
void zero_to_device()
{
device_zero();
}
void zero_to_device()
{
device_zero();
}
};
/* Device Vector
@@ -307,135 +311,134 @@ public:
* automatically attached to kernel globals, using the provided name
* matching an entry in kernel_textures.h. */
template<typename T> class device_vector : public device_memory
{
public:
device_vector(Device *device, const char *name, MemoryType type)
: device_memory(device, name, type)
{
data_type = device_type_traits<T>::data_type;
data_elements = device_type_traits<T>::num_elements;
template<typename T> class device_vector : public device_memory {
public:
device_vector(Device *device, const char *name, MemoryType type)
: device_memory(device, name, type)
{
data_type = device_type_traits<T>::data_type;
data_elements = device_type_traits<T>::num_elements;
assert(data_elements > 0);
}
assert(data_elements > 0);
}
virtual ~device_vector()
{
free();
}
virtual ~device_vector()
{
free();
}
/* Host memory allocation. */
T *alloc(size_t width, size_t height = 0, size_t depth = 0)
{
size_t new_size = size(width, height, depth);
/* Host memory allocation. */
T *alloc(size_t width, size_t height = 0, size_t depth = 0)
{
size_t new_size = size(width, height, depth);
if(new_size != data_size) {
device_free();
host_free();
host_pointer = host_alloc(sizeof(T)*new_size);
assert(device_pointer == 0);
}
if (new_size != data_size) {
device_free();
host_free();
host_pointer = host_alloc(sizeof(T) * new_size);
assert(device_pointer == 0);
}
data_size = new_size;
data_width = width;
data_height = height;
data_depth = depth;
data_size = new_size;
data_width = width;
data_height = height;
data_depth = depth;
return data();
}
return data();
}
/* Host memory resize. Only use this if the original data needs to be
* preserved, it is faster to call alloc() if it can be discarded. */
T *resize(size_t width, size_t height = 0, size_t depth = 0)
{
size_t new_size = size(width, height, depth);
/* Host memory resize. Only use this if the original data needs to be
* preserved, it is faster to call alloc() if it can be discarded. */
T *resize(size_t width, size_t height = 0, size_t depth = 0)
{
size_t new_size = size(width, height, depth);
if(new_size != data_size) {
void *new_ptr = host_alloc(sizeof(T)*new_size);
if (new_size != data_size) {
void *new_ptr = host_alloc(sizeof(T) * new_size);
if(new_size && data_size) {
size_t min_size = ((new_size < data_size)? new_size: data_size);
memcpy((T*)new_ptr, (T*)host_pointer, sizeof(T)*min_size);
}
if (new_size && data_size) {
size_t min_size = ((new_size < data_size) ? new_size : data_size);
memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
}
device_free();
host_free();
host_pointer = new_ptr;
assert(device_pointer == 0);
}
device_free();
host_free();
host_pointer = new_ptr;
assert(device_pointer == 0);
}
data_size = new_size;
data_width = width;
data_height = height;
data_depth = depth;
data_size = new_size;
data_width = width;
data_height = height;
data_depth = depth;
return data();
}
return data();
}
/* Take over data from an existing array. */
void steal_data(array<T>& from)
{
device_free();
host_free();
/* Take over data from an existing array. */
void steal_data(array<T> &from)
{
device_free();
host_free();
data_size = from.size();
data_width = 0;
data_height = 0;
data_depth = 0;
host_pointer = from.steal_pointer();
assert(device_pointer == 0);
}
data_size = from.size();
data_width = 0;
data_height = 0;
data_depth = 0;
host_pointer = from.steal_pointer();
assert(device_pointer == 0);
}
/* Free device and host memory. */
void free()
{
device_free();
host_free();
/* Free device and host memory. */
void free()
{
device_free();
host_free();
data_size = 0;
data_width = 0;
data_height = 0;
data_depth = 0;
host_pointer = 0;
assert(device_pointer == 0);
}
data_size = 0;
data_width = 0;
data_height = 0;
data_depth = 0;
host_pointer = 0;
assert(device_pointer == 0);
}
size_t size()
{
return data_size;
}
size_t size()
{
return data_size;
}
T* data()
{
return (T*)host_pointer;
}
T *data()
{
return (T *)host_pointer;
}
T& operator[](size_t i)
{
assert(i < data_size);
return data()[i];
}
T &operator[](size_t i)
{
assert(i < data_size);
return data()[i];
}
void copy_to_device()
{
device_copy_to();
}
void copy_to_device()
{
device_copy_to();
}
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));
}
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));
}
void zero_to_device()
{
device_zero();
}
void zero_to_device()
{
device_zero();
}
protected:
size_t size(size_t width, size_t height, size_t depth)
{
return width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
}
protected:
size_t size(size_t width, size_t height, size_t depth)
{
return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
}
};
/* Pixel Memory
@@ -443,28 +446,26 @@ protected:
* Device memory to efficiently draw as pixels to the screen in interactive
* rendering. Only copying pixels from the device is supported, not copying to. */
template<typename T> class device_pixels : public device_vector<T>
{
public:
device_pixels(Device *device, const char *name)
: device_vector<T>(device, name, MEM_PIXELS)
{
}
template<typename T> class device_pixels : public device_vector<T> {
public:
device_pixels(Device *device, const char *name) : device_vector<T>(device, name, MEM_PIXELS)
{
}
void alloc_to_device(size_t width, size_t height, size_t depth = 0)
{
device_vector<T>::alloc(width, height, depth);
void alloc_to_device(size_t width, size_t height, size_t depth = 0)
{
device_vector<T>::alloc(width, height, depth);
if(!device_memory::device_pointer) {
device_memory::device_alloc();
}
}
if (!device_memory::device_pointer) {
device_memory::device_alloc();
}
}
T *copy_from_device(int y, int w, int h)
{
device_memory::device_copy_from(y, w, h, sizeof(T));
return device_vector<T>::data();
}
T *copy_from_device(int y, int w, int h)
{
device_memory::device_copy_from(y, w, h, sizeof(T));
return device_vector<T>::data();
}
};
/* Device Sub Memory
@@ -476,25 +477,24 @@ public:
* Note: some devices require offset and size of the sub_ptr to be properly
* aligned to device->mem_address_alingment(). */
class device_sub_ptr
{
public:
device_sub_ptr(device_memory& mem, int offset, int size);
~device_sub_ptr();
class device_sub_ptr {
public:
device_sub_ptr(device_memory &mem, int offset, int size);
~device_sub_ptr();
device_ptr operator*() const
{
return ptr;
}
device_ptr operator*() const
{
return ptr;
}
protected:
/* No copying. */
device_sub_ptr& operator = (const device_sub_ptr&);
protected:
/* No copying. */
device_sub_ptr &operator=(const device_sub_ptr &);
Device *device;
device_ptr ptr;
Device *device;
device_ptr ptr;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_MEMORY_H__ */
#endif /* __DEVICE_MEMORY_H__ */

View File

@@ -31,391 +31,406 @@
CCL_NAMESPACE_BEGIN
class MultiDevice : public Device
{
public:
struct SubDevice {
explicit SubDevice(Device *device_)
: device(device_) {}
class MultiDevice : public Device {
public:
struct SubDevice {
explicit SubDevice(Device *device_) : device(device_)
{
}
Device *device;
map<device_ptr, device_ptr> ptr_map;
};
Device *device;
map<device_ptr, device_ptr> ptr_map;
};
list<SubDevice> devices;
device_ptr unique_key;
list<SubDevice> devices;
device_ptr unique_key;
MultiDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), unique_key(1)
{
foreach(DeviceInfo& subinfo, info.multi_devices) {
Device *device = Device::create(subinfo, sub_stats_, profiler, background);
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), unique_key(1)
{
foreach (DeviceInfo &subinfo, info.multi_devices) {
Device *device = Device::create(subinfo, sub_stats_, profiler, background);
/* Always add CPU devices at the back since GPU devices can change
* host memory pointers, which CPU uses as device pointer. */
if(subinfo.type == DEVICE_CPU) {
devices.push_back(SubDevice(device));
}
else {
devices.push_front(SubDevice(device));
}
}
/* Always add CPU devices at the back since GPU devices can change
* host memory pointers, which CPU uses as device pointer. */
if (subinfo.type == DEVICE_CPU) {
devices.push_back(SubDevice(device));
}
else {
devices.push_front(SubDevice(device));
}
}
#ifdef WITH_NETWORK
/* try to add network devices */
ServerDiscovery discovery(true);
time_sleep(1.0);
/* try to add network devices */
ServerDiscovery discovery(true);
time_sleep(1.0);
vector<string> servers = discovery.get_server_list();
vector<string> servers = discovery.get_server_list();
foreach(string& server, servers) {
Device *device = device_network_create(info, stats, profiler, server.c_str());
if(device)
devices.push_back(SubDevice(device));
}
foreach (string &server, servers) {
Device *device = device_network_create(info, stats, profiler, server.c_str());
if (device)
devices.push_back(SubDevice(device));
}
#endif
}
}
~MultiDevice()
{
foreach(SubDevice& sub, devices)
delete sub.device;
}
~MultiDevice()
{
foreach (SubDevice &sub, devices)
delete sub.device;
}
const string& error_message()
{
foreach(SubDevice& sub, devices) {
if(sub.device->error_message() != "") {
if(error_msg == "")
error_msg = sub.device->error_message();
break;
}
}
const string &error_message()
{
foreach (SubDevice &sub, devices) {
if (sub.device->error_message() != "") {
if (error_msg == "")
error_msg = sub.device->error_message();
break;
}
}
return error_msg;
}
return error_msg;
}
virtual bool show_samples() const
{
if(devices.size() > 1) {
return false;
}
return devices.front().device->show_samples();
}
virtual bool show_samples() const
{
if (devices.size() > 1) {
return false;
}
return devices.front().device->show_samples();
}
virtual BVHLayoutMask get_bvh_layout_mask() const {
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
foreach(const SubDevice& sub_device, devices) {
bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
}
return bvh_layout_mask;
}
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
foreach (const SubDevice &sub_device, devices) {
bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
}
return bvh_layout_mask;
}
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
foreach(SubDevice& sub, devices)
if(!sub.device->load_kernels(requested_features))
return false;
bool load_kernels(const DeviceRequestedFeatures &requested_features)
{
foreach (SubDevice &sub, devices)
if (!sub.device->load_kernels(requested_features))
return false;
return true;
}
return true;
}
bool wait_for_availability(const DeviceRequestedFeatures& requested_features)
{
foreach(SubDevice& sub, devices)
if(!sub.device->wait_for_availability(requested_features))
return false;
bool wait_for_availability(const DeviceRequestedFeatures &requested_features)
{
foreach (SubDevice &sub, devices)
if (!sub.device->wait_for_availability(requested_features))
return false;
return true;
}
return true;
}
DeviceKernelStatus get_active_kernel_switch_state()
{
DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
DeviceKernelStatus get_active_kernel_switch_state()
{
DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
foreach(SubDevice& sub, devices) {
DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
switch (subresult) {
case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
result = subresult;
break;
foreach (SubDevice &sub, devices) {
DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
switch (subresult) {
case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
result = subresult;
break;
case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
return subresult;
case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
return subresult;
case DEVICE_KERNEL_USING_FEATURE_KERNEL:
case DEVICE_KERNEL_UNKNOWN:
break;
}
}
return result;
}
case DEVICE_KERNEL_USING_FEATURE_KERNEL:
case DEVICE_KERNEL_UNKNOWN:
break;
}
}
return result;
}
void mem_alloc(device_memory& mem)
{
device_ptr key = unique_key++;
void mem_alloc(device_memory &mem)
{
device_ptr key = unique_key++;
foreach(SubDevice& sub, devices) {
mem.device = sub.device;
mem.device_pointer = 0;
mem.device_size = 0;
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = 0;
mem.device_size = 0;
sub.device->mem_alloc(mem);
sub.ptr_map[key] = mem.device_pointer;
}
sub.device->mem_alloc(mem);
sub.ptr_map[key] = mem.device_pointer;
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size);
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size);
}
void mem_copy_to(device_memory& mem)
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key)? existing_key: unique_key++;
size_t existing_size = mem.device_size;
void mem_copy_to(device_memory &mem)
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
mem.device_size = existing_size;
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_copy_to(mem);
sub.ptr_map[key] = mem.device_pointer;
}
sub.device->mem_copy_to(mem);
sub.ptr_map[key] = mem.device_pointer;
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
device_ptr key = mem.device_pointer;
int i = 0, sub_h = h/devices.size();
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
{
device_ptr key = mem.device_pointer;
int i = 0, sub_h = h / devices.size();
foreach(SubDevice& sub, devices) {
int sy = y + i*sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
sub.device->mem_copy_from(mem, sy, w, sh, elem);
i++;
}
sub.device->mem_copy_from(mem, sy, w, sh, elem);
i++;
}
mem.device = this;
mem.device_pointer = key;
}
mem.device = this;
mem.device_pointer = key;
}
void mem_zero(device_memory& mem)
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key)? existing_key: unique_key++;
size_t existing_size = mem.device_size;
void mem_zero(device_memory &mem)
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
mem.device_size = existing_size;
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer;
}
sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer;
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_free(device_memory& mem)
{
device_ptr key = mem.device_pointer;
size_t existing_size = mem.device_size;
void mem_free(device_memory &mem)
{
device_ptr key = mem.device_pointer;
size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) {
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size;
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size;
sub.device->mem_free(mem);
sub.ptr_map.erase(sub.ptr_map.find(key));
}
sub.device->mem_free(mem);
sub.ptr_map.erase(sub.ptr_map.find(key));
}
mem.device = this;
mem.device_pointer = 0;
mem.device_size = 0;
stats.mem_free(existing_size);
}
mem.device = this;
mem.device_pointer = 0;
mem.device_size = 0;
stats.mem_free(existing_size);
}
void const_copy_to(const char *name, void *host, size_t size)
{
foreach(SubDevice& sub, devices)
sub.device->const_copy_to(name, host, size);
}
void const_copy_to(const char *name, void *host, size_t size)
{
foreach (SubDevice &sub, devices)
sub.device->const_copy_to(name, host, size);
}
void draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
{
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h/devices.size();
int sub_height = height/devices.size();
void draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h / devices.size();
int sub_height = height / devices.size();
foreach(SubDevice& sub, devices) {
int sy = y + i*sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
int sdy = dy + i*sub_height;
/* adjust math for w/width */
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
int sheight = (i == (int)devices.size() - 1) ? height - sub_height * i : sub_height;
int sdy = dy + i * sub_height;
/* adjust math for w/width */
rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
i++;
}
rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(
rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
i++;
}
rgba.device_pointer = key;
}
rgba.device_pointer = key;
}
void map_tile(Device *sub_device, RenderTile& tile)
{
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
}
}
}
void map_tile(Device *sub_device, RenderTile &tile)
{
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
if (tile.buffer)
tile.buffer = sub.ptr_map[tile.buffer];
}
}
}
int device_number(Device *sub_device)
{
int i = 0;
int device_number(Device *sub_device)
{
int i = 0;
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device)
return i;
i++;
}
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device)
return i;
i++;
}
return -1;
}
return -1;
}
void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
for(int i = 0; i < 9; i++) {
if(!tiles[i].buffers) {
continue;
}
void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
for (int i = 0; i < 9; i++) {
if (!tiles[i].buffers) {
continue;
}
/* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being denoised by the CPU thread. */
if(!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
mem.copy_from_device(0, mem.data_size, 1);
}
/* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being denoised by the CPU thread. */
if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
mem.copy_from_device(0, mem.data_size, 1);
}
mem.swap_device(sub_device, 0, 0);
mem.swap_device(sub_device, 0, 0);
mem.copy_to_device();
tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
mem.copy_to_device();
tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
mem.restore_device();
}
}
}
mem.restore_device();
}
}
}
void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
{
/* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
mem.copy_from_device(0, mem.data_size, 1);
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
/* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
mem.copy_from_device(0, mem.data_size, 1);
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
for(int i = 0; i < 9; i++) {
if(!tiles[i].buffers) {
continue;
}
for (int i = 0; i < 9; i++) {
if (!tiles[i].buffers) {
continue;
}
device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) {
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem);
mem.restore_device();
}
}
}
device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem);
mem.restore_device();
}
}
}
int get_split_task_count(DeviceTask& task)
{
int total_tasks = 0;
list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) {
if(!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
int get_split_task_count(DeviceTask &task)
{
int total_tasks = 0;
list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach (SubDevice &sub, devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
total_tasks += sub.device->get_split_task_count(subtask);
}
}
return total_tasks;
}
total_tasks += sub.device->get_split_task_count(subtask);
}
}
return total_tasks;
}
void task_add(DeviceTask& task)
{
list<DeviceTask> tasks;
task.split(tasks, devices.size());
void task_add(DeviceTask &task)
{
list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) {
if(!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
foreach (SubDevice &sub, devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
if (task.buffer)
subtask.buffer = sub.ptr_map[task.buffer];
if (task.rgba_byte)
subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if (task.rgba_half)
subtask.rgba_half = sub.ptr_map[task.rgba_half];
if (task.shader_input)
subtask.shader_input = sub.ptr_map[task.shader_input];
if (task.shader_output)
subtask.shader_output = sub.ptr_map[task.shader_output];
sub.device->task_add(subtask);
}
}
}
sub.device->task_add(subtask);
}
}
}
void task_wait()
{
foreach(SubDevice& sub, devices)
sub.device->task_wait();
}
void task_wait()
{
foreach (SubDevice &sub, devices)
sub.device->task_wait();
}
void task_cancel()
{
foreach(SubDevice& sub, devices)
sub.device->task_cancel();
}
void task_cancel()
{
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
}
protected:
Stats sub_stats_;
protected:
Stats sub_stats_;
};
Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background)
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
return new MultiDevice(info, stats, profiler, background);
return new MultiDevice(info, stats, profiler, background);
}
CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

View File

@@ -19,35 +19,35 @@
#ifdef WITH_NETWORK
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/array.hpp>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/thread.hpp>
# include <boost/archive/text_iarchive.hpp>
# include <boost/archive/text_oarchive.hpp>
# include <boost/archive/binary_iarchive.hpp>
# include <boost/archive/binary_oarchive.hpp>
# include <boost/array.hpp>
# include <boost/asio.hpp>
# include <boost/bind.hpp>
# include <boost/serialization/vector.hpp>
# include <boost/thread.hpp>
#include <iostream>
#include <sstream>
#include <deque>
# include <iostream>
# include <sstream>
# include <deque>
#include "render/buffers.h"
# include "render/buffers.h"
#include "util/util_foreach.h"
#include "util/util_list.h"
#include "util/util_map.h"
#include "util/util_param.h"
#include "util/util_string.h"
# include "util/util_foreach.h"
# include "util/util_list.h"
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_string.h"
CCL_NAMESPACE_BEGIN
using std::cout;
using std::cerr;
using std::cout;
using std::exception;
using std::hex;
using std::setw;
using std::exception;
using boost::asio::ip::tcp;
@@ -56,436 +56,435 @@ static const int DISCOVER_PORT = 5121;
static const string DISCOVER_REQUEST_MSG = "REQUEST_RENDER_SERVER_IP";
static const string DISCOVER_REPLY_MSG = "REPLY_RENDER_SERVER_IP";
#if 0
# if 0
typedef boost::archive::text_oarchive o_archive;
typedef boost::archive::text_iarchive i_archive;
#else
# else
typedef boost::archive::binary_oarchive o_archive;
typedef boost::archive::binary_iarchive i_archive;
#endif
# endif
/* Serialization of device memory */
class network_device_memory : public device_memory
{
public:
network_device_memory(Device *device)
: device_memory(device, "", MEM_READ_ONLY)
{
}
class network_device_memory : public device_memory {
public:
network_device_memory(Device *device) : device_memory(device, "", MEM_READ_ONLY)
{
}
~network_device_memory()
{
device_pointer = 0;
};
~network_device_memory()
{
device_pointer = 0;
};
vector<char> local_data;
vector<char> local_data;
};
/* Common netowrk error function / object for both DeviceNetwork and DeviceServer*/
class NetworkError {
public:
NetworkError() {
error = "";
error_count = 0;
}
public:
NetworkError()
{
error = "";
error_count = 0;
}
~NetworkError() {}
~NetworkError()
{
}
void network_error(const string& message) {
error = message;
error_count += 1;
}
void network_error(const string &message)
{
error = message;
error_count += 1;
}
bool have_error() {
return true ? error_count > 0 : false;
}
bool have_error()
{
return true ? error_count > 0 : false;
}
private:
string error;
int error_count;
private:
string error;
int error_count;
};
/* Remote procedure call Send */
class RPCSend {
public:
RPCSend(tcp::socket& socket_, NetworkError* e, const string& name_ = "")
: name(name_), socket(socket_), archive(archive_stream), sent(false)
{
archive & name_;
error_func = e;
fprintf(stderr, "rpc send %s\n", name.c_str());
}
public:
RPCSend(tcp::socket &socket_, NetworkError *e, const string &name_ = "")
: name(name_), socket(socket_), archive(archive_stream), sent(false)
{
archive &name_;
error_func = e;
fprintf(stderr, "rpc send %s\n", name.c_str());
}
~RPCSend()
{
}
~RPCSend()
{
}
void add(const device_memory& mem)
{
archive & mem.data_type & mem.data_elements & mem.data_size;
archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
archive & mem.type & string(mem.name);
archive & mem.interpolation & mem.extension;
archive & mem.device_pointer;
}
void add(const device_memory &mem)
{
archive &mem.data_type &mem.data_elements &mem.data_size;
archive &mem.data_width &mem.data_height &mem.data_depth &mem.device_pointer;
archive &mem.type &string(mem.name);
archive &mem.interpolation &mem.extension;
archive &mem.device_pointer;
}
template<typename T> void add(const T& data)
{
archive & data;
}
template<typename T> void add(const T &data)
{
archive &data;
}
void add(const DeviceTask& task)
{
int type = (int)task.type;
archive & type & task.x & task.y & task.w & task.h;
archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride;
archive & task.shader_input & task.shader_output & task.shader_eval_type;
archive & task.shader_x & task.shader_w;
archive & task.need_finish_queue;
}
void add(const DeviceTask &task)
{
int type = (int)task.type;
archive &type &task.x &task.y &task.w &task.h;
archive &task.rgba_byte &task.rgba_half &task.buffer &task.sample &task.num_samples;
archive &task.offset &task.stride;
archive &task.shader_input &task.shader_output &task.shader_eval_type;
archive &task.shader_x &task.shader_w;
archive &task.need_finish_queue;
}
void add(const RenderTile& tile)
{
archive & tile.x & tile.y & tile.w & tile.h;
archive & tile.start_sample & tile.num_samples & tile.sample;
archive & tile.resolution & tile.offset & tile.stride;
archive & tile.buffer;
}
void add(const RenderTile &tile)
{
archive &tile.x &tile.y &tile.w &tile.h;
archive &tile.start_sample &tile.num_samples &tile.sample;
archive &tile.resolution &tile.offset &tile.stride;
archive &tile.buffer;
}
void write()
{
boost::system::error_code error;
void write()
{
boost::system::error_code error;
/* get string from stream */
string archive_str = archive_stream.str();
/* get string from stream */
string archive_str = archive_stream.str();
/* first send fixed size header with size of following data */
ostringstream header_stream;
header_stream << setw(8) << hex << archive_str.size();
string header_str = header_stream.str();
/* first send fixed size header with size of following data */
ostringstream header_stream;
header_stream << setw(8) << hex << archive_str.size();
string header_str = header_stream.str();
boost::asio::write(socket,
boost::asio::buffer(header_str),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(header_str), boost::asio::transfer_all(), error);
if(error.value())
error_func->network_error(error.message());
if (error.value())
error_func->network_error(error.message());
/* then send actual data */
boost::asio::write(socket,
boost::asio::buffer(archive_str),
boost::asio::transfer_all(), error);
/* then send actual data */
boost::asio::write(
socket, boost::asio::buffer(archive_str), boost::asio::transfer_all(), error);
if(error.value())
error_func->network_error(error.message());
if (error.value())
error_func->network_error(error.message());
sent = true;
}
sent = true;
}
void write_buffer(void *buffer, size_t size)
{
boost::system::error_code error;
void write_buffer(void *buffer, size_t size)
{
boost::system::error_code error;
boost::asio::write(socket,
boost::asio::buffer(buffer, size),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(buffer, size), boost::asio::transfer_all(), error);
if(error.value())
error_func->network_error(error.message());
}
if (error.value())
error_func->network_error(error.message());
}
protected:
string name;
tcp::socket& socket;
ostringstream archive_stream;
o_archive archive;
bool sent;
NetworkError *error_func;
protected:
string name;
tcp::socket &socket;
ostringstream archive_stream;
o_archive archive;
bool sent;
NetworkError *error_func;
};
/* Remote procedure call Receive */
class RPCReceive {
public:
RPCReceive(tcp::socket& socket_, NetworkError* e )
: socket(socket_), archive_stream(NULL), archive(NULL)
{
error_func = e;
/* read head with fixed size */
vector<char> header(8);
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(header), error);
public:
RPCReceive(tcp::socket &socket_, NetworkError *e)
: socket(socket_), archive_stream(NULL), archive(NULL)
{
error_func = e;
/* read head with fixed size */
vector<char> header(8);
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(header), error);
if(error.value()) {
error_func->network_error(error.message());
}
if (error.value()) {
error_func->network_error(error.message());
}
/* verify if we got something */
if(len == header.size()) {
/* decode header */
string header_str(&header[0], header.size());
istringstream header_stream(header_str);
/* verify if we got something */
if (len == header.size()) {
/* decode header */
string header_str(&header[0], header.size());
istringstream header_stream(header_str);
size_t data_size;
size_t data_size;
if((header_stream >> hex >> data_size)) {
if ((header_stream >> hex >> data_size)) {
vector<char> data(data_size);
size_t len = boost::asio::read(socket, boost::asio::buffer(data), error);
vector<char> data(data_size);
size_t len = boost::asio::read(socket, boost::asio::buffer(data), error);
if(error.value())
error_func->network_error(error.message());
if (error.value())
error_func->network_error(error.message());
if (len == data_size) {
archive_str = (data.size()) ? string(&data[0], data.size()) : string("");
if(len == data_size) {
archive_str = (data.size())? string(&data[0], data.size()): string("");
archive_stream = new istringstream(archive_str);
archive = new i_archive(*archive_stream);
archive_stream = new istringstream(archive_str);
archive = new i_archive(*archive_stream);
*archive &name;
fprintf(stderr, "rpc receive %s\n", name.c_str());
}
else {
error_func->network_error("Network receive error: data size doesn't match header");
}
}
else {
error_func->network_error("Network receive error: can't decode data size from header");
}
}
else {
error_func->network_error("Network receive error: invalid header size");
}
}
*archive & name;
fprintf(stderr, "rpc receive %s\n", name.c_str());
}
else {
error_func->network_error("Network receive error: data size doesn't match header");
}
}
else {
error_func->network_error("Network receive error: can't decode data size from header");
}
}
else {
error_func->network_error("Network receive error: invalid header size");
}
}
~RPCReceive()
{
delete archive;
delete archive_stream;
}
~RPCReceive()
{
delete archive;
delete archive_stream;
}
void read(network_device_memory &mem, string &name)
{
*archive &mem.data_type &mem.data_elements &mem.data_size;
*archive &mem.data_width &mem.data_height &mem.data_depth &mem.device_pointer;
*archive &mem.type &name;
*archive &mem.interpolation &mem.extension;
*archive &mem.device_pointer;
void read(network_device_memory& mem, string& name)
{
*archive & mem.data_type & mem.data_elements & mem.data_size;
*archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
*archive & mem.type & name;
*archive & mem.interpolation & mem.extension;
*archive & mem.device_pointer;
mem.name = name.c_str();
mem.host_pointer = 0;
mem.name = name.c_str();
mem.host_pointer = 0;
/* Can't transfer OpenGL texture over network. */
if (mem.type == MEM_PIXELS) {
mem.type = MEM_READ_WRITE;
}
}
/* Can't transfer OpenGL texture over network. */
if(mem.type == MEM_PIXELS) {
mem.type = MEM_READ_WRITE;
}
}
template<typename T> void read(T &data)
{
*archive &data;
}
template<typename T> void read(T& data)
{
*archive & data;
}
void read_buffer(void *buffer, size_t size)
{
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error);
void read_buffer(void *buffer, size_t size)
{
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error);
if (error.value()) {
error_func->network_error(error.message());
}
if(error.value()) {
error_func->network_error(error.message());
}
if (len != size)
cout << "Network receive error: buffer size doesn't match expected size\n";
}
if(len != size)
cout << "Network receive error: buffer size doesn't match expected size\n";
}
void read(DeviceTask &task)
{
int type;
void read(DeviceTask& task)
{
int type;
*archive &type &task.x &task.y &task.w &task.h;
*archive &task.rgba_byte &task.rgba_half &task.buffer &task.sample &task.num_samples;
*archive &task.offset &task.stride;
*archive &task.shader_input &task.shader_output &task.shader_eval_type;
*archive &task.shader_x &task.shader_w;
*archive &task.need_finish_queue;
*archive & type & task.x & task.y & task.w & task.h;
*archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.offset & task.stride;
*archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w;
*archive & task.need_finish_queue;
task.type = (DeviceTask::Type)type;
}
task.type = (DeviceTask::Type)type;
}
void read(RenderTile &tile)
{
*archive &tile.x &tile.y &tile.w &tile.h;
*archive &tile.start_sample &tile.num_samples &tile.sample;
*archive &tile.resolution &tile.offset &tile.stride;
*archive &tile.buffer;
void read(RenderTile& tile)
{
*archive & tile.x & tile.y & tile.w & tile.h;
*archive & tile.start_sample & tile.num_samples & tile.sample;
*archive & tile.resolution & tile.offset & tile.stride;
*archive & tile.buffer;
tile.buffers = NULL;
}
tile.buffers = NULL;
}
string name;
string name;
protected:
tcp::socket& socket;
string archive_str;
istringstream *archive_stream;
i_archive *archive;
NetworkError *error_func;
protected:
tcp::socket &socket;
string archive_str;
istringstream *archive_stream;
i_archive *archive;
NetworkError *error_func;
};
/* Server auto discovery */
class ServerDiscovery {
public:
explicit ServerDiscovery(bool discover = false)
: listen_socket(io_service), collect_servers(false)
{
/* setup listen socket */
listen_endpoint.address(boost::asio::ip::address_v4::any());
listen_endpoint.port(DISCOVER_PORT);
public:
explicit ServerDiscovery(bool discover = false)
: listen_socket(io_service), collect_servers(false)
{
/* setup listen socket */
listen_endpoint.address(boost::asio::ip::address_v4::any());
listen_endpoint.port(DISCOVER_PORT);
listen_socket.open(listen_endpoint.protocol());
listen_socket.open(listen_endpoint.protocol());
boost::asio::socket_base::reuse_address option(true);
listen_socket.set_option(option);
boost::asio::socket_base::reuse_address option(true);
listen_socket.set_option(option);
listen_socket.bind(listen_endpoint);
listen_socket.bind(listen_endpoint);
/* setup receive callback */
async_receive();
/* setup receive callback */
async_receive();
/* start server discovery */
if(discover) {
collect_servers = true;
servers.clear();
/* start server discovery */
if (discover) {
collect_servers = true;
servers.clear();
broadcast_message(DISCOVER_REQUEST_MSG);
}
broadcast_message(DISCOVER_REQUEST_MSG);
}
/* start thread */
work = new boost::asio::io_service::work(io_service);
thread = new boost::thread(boost::bind(&boost::asio::io_service::run, &io_service));
}
/* start thread */
work = new boost::asio::io_service::work(io_service);
thread = new boost::thread(boost::bind(&boost::asio::io_service::run, &io_service));
}
~ServerDiscovery()
{
io_service.stop();
thread->join();
delete thread;
delete work;
}
~ServerDiscovery()
{
io_service.stop();
thread->join();
delete thread;
delete work;
}
vector<string> get_server_list()
{
vector<string> result;
vector<string> get_server_list()
{
vector<string> result;
mutex.lock();
result = vector<string>(servers.begin(), servers.end());
mutex.unlock();
mutex.lock();
result = vector<string>(servers.begin(), servers.end());
mutex.unlock();
return result;
}
return result;
}
private:
void handle_receive_from(const boost::system::error_code& error, size_t size)
{
if(error) {
cout << "Server discovery receive error: " << error.message() << "\n";
return;
}
private:
void handle_receive_from(const boost::system::error_code &error, size_t size)
{
if (error) {
cout << "Server discovery receive error: " << error.message() << "\n";
return;
}
if(size > 0) {
string msg = string(receive_buffer, size);
if (size > 0) {
string msg = string(receive_buffer, size);
/* handle incoming message */
if(collect_servers) {
if(msg == DISCOVER_REPLY_MSG) {
string address = receive_endpoint.address().to_string();
/* handle incoming message */
if (collect_servers) {
if (msg == DISCOVER_REPLY_MSG) {
string address = receive_endpoint.address().to_string();
mutex.lock();
mutex.lock();
/* add address if it's not already in the list */
bool found = std::find(servers.begin(), servers.end(),
address) != servers.end();
/* add address if it's not already in the list */
bool found = std::find(servers.begin(), servers.end(), address) != servers.end();
if(!found)
servers.push_back(address);
if (!found)
servers.push_back(address);
mutex.unlock();
}
}
else {
/* reply to request */
if(msg == DISCOVER_REQUEST_MSG)
broadcast_message(DISCOVER_REPLY_MSG);
}
}
mutex.unlock();
}
}
else {
/* reply to request */
if (msg == DISCOVER_REQUEST_MSG)
broadcast_message(DISCOVER_REPLY_MSG);
}
}
async_receive();
}
async_receive();
}
void async_receive()
{
listen_socket.async_receive_from(
boost::asio::buffer(receive_buffer), receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from, this,
boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
}
void async_receive()
{
listen_socket.async_receive_from(boost::asio::buffer(receive_buffer),
receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from,
this,
boost::asio::placeholders::error,
boost::asio::placeholders::bytes_transferred));
}
void broadcast_message(const string& msg)
{
/* setup broadcast socket */
boost::asio::ip::udp::socket socket(io_service);
void broadcast_message(const string &msg)
{
/* setup broadcast socket */
boost::asio::ip::udp::socket socket(io_service);
socket.open(boost::asio::ip::udp::v4());
socket.open(boost::asio::ip::udp::v4());
boost::asio::socket_base::broadcast option(true);
socket.set_option(option);
boost::asio::socket_base::broadcast option(true);
socket.set_option(option);
boost::asio::ip::udp::endpoint broadcast_endpoint(
boost::asio::ip::address::from_string("255.255.255.255"), DISCOVER_PORT);
boost::asio::ip::udp::endpoint broadcast_endpoint(
boost::asio::ip::address::from_string("255.255.255.255"), DISCOVER_PORT);
/* broadcast message */
socket.send_to(boost::asio::buffer(msg), broadcast_endpoint);
}
/* broadcast message */
socket.send_to(boost::asio::buffer(msg), broadcast_endpoint);
}
/* network service and socket */
boost::asio::io_service io_service;
boost::asio::ip::udp::endpoint listen_endpoint;
boost::asio::ip::udp::socket listen_socket;
/* network service and socket */
boost::asio::io_service io_service;
boost::asio::ip::udp::endpoint listen_endpoint;
boost::asio::ip::udp::socket listen_socket;
/* threading */
boost::thread *thread;
boost::asio::io_service::work *work;
boost::mutex mutex;
/* threading */
boost::thread *thread;
boost::asio::io_service::work *work;
boost::mutex mutex;
/* buffer and endpoint for receiving messages */
char receive_buffer[256];
boost::asio::ip::udp::endpoint receive_endpoint;
/* buffer and endpoint for receiving messages */
char receive_buffer[256];
boost::asio::ip::udp::endpoint receive_endpoint;
// os, version, devices, status, host name, group name, ip as far as fields go
struct ServerInfo {
string cycles_version;
string os;
int device_count;
string status;
string host_name;
string group_name;
string host_addr;
};
// os, version, devices, status, host name, group name, ip as far as fields go
struct ServerInfo {
string cycles_version;
string os;
int device_count;
string status;
string host_name;
string group_name;
string host_addr;
};
/* collection of server addresses in list */
bool collect_servers;
vector<string> servers;
/* collection of server addresses in list */
bool collect_servers;
vector<string> servers;
};
CCL_NAMESPACE_END
#endif
#endif /* __DEVICE_NETWORK_H__ */
#endif /* __DEVICE_NETWORK_H__ */

View File

@@ -16,218 +16,211 @@
#ifdef WITH_OPENCL
#include "device/opencl/opencl.h"
# include "device/opencl/opencl.h"
#include "device/device_intern.h"
# include "device/device_intern.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_set.h"
#include "util/util_string.h"
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_set.h"
# include "util/util_string.h"
CCL_NAMESPACE_BEGIN
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
return opencl_create_split_device(info, stats, profiler, background);
return opencl_create_split_device(info, stats, profiler, background);
}
bool device_opencl_init()
{
static bool initialized = false;
static bool result = false;
static bool initialized = false;
static bool result = false;
if(initialized)
return result;
if (initialized)
return result;
initialized = true;
initialized = true;
if(OpenCLInfo::device_type() != 0) {
int clew_result = clewInit();
if(clew_result == CLEW_SUCCESS) {
VLOG(1) << "CLEW initialization succeeded.";
result = true;
}
else {
VLOG(1) << "CLEW initialization failed: "
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED)
? "Error setting up atexit() handler"
: "Error opening the library");
}
}
else {
VLOG(1) << "Skip initializing CLEW, platform is force disabled.";
result = false;
}
if (OpenCLInfo::device_type() != 0) {
int clew_result = clewInit();
if (clew_result == CLEW_SUCCESS) {
VLOG(1) << "CLEW initialization succeeded.";
result = true;
}
else {
VLOG(1) << "CLEW initialization failed: "
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
"Error opening the library");
}
}
else {
VLOG(1) << "Skip initializing CLEW, platform is force disabled.";
result = false;
}
return result;
return result;
}
static cl_int device_opencl_get_num_platforms_safe(cl_uint *num_platforms)
{
#ifdef _WIN32
__try {
return clGetPlatformIDs(0, NULL, num_platforms);
}
__except(EXCEPTION_EXECUTE_HANDLER) {
/* Ignore crashes inside the OpenCL driver and hope we can
* survive even with corrupted OpenCL installs. */
fprintf(stderr, "Cycles OpenCL: driver crashed, continuing without OpenCL.\n");
}
# ifdef _WIN32
__try {
return clGetPlatformIDs(0, NULL, num_platforms);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
/* Ignore crashes inside the OpenCL driver and hope we can
* survive even with corrupted OpenCL installs. */
fprintf(stderr, "Cycles OpenCL: driver crashed, continuing without OpenCL.\n");
}
*num_platforms = 0;
return CL_DEVICE_NOT_FOUND;
#else
return clGetPlatformIDs(0, NULL, num_platforms);
#endif
*num_platforms = 0;
return CL_DEVICE_NOT_FOUND;
# else
return clGetPlatformIDs(0, NULL, num_platforms);
# endif
}
void device_opencl_info(vector<DeviceInfo>& devices)
void device_opencl_info(vector<DeviceInfo> &devices)
{
cl_uint num_platforms = 0;
device_opencl_get_num_platforms_safe(&num_platforms);
if(num_platforms == 0) {
return;
}
cl_uint num_platforms = 0;
device_opencl_get_num_platforms_safe(&num_platforms);
if (num_platforms == 0) {
return;
}
vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices);
/* Devices are numbered consecutively across platforms. */
int num_devices = 0;
set<string> unique_ids;
foreach(OpenCLPlatformDevice& platform_device, usable_devices) {
/* Compute unique ID for persistent user preferences. */
const string& platform_name = platform_device.platform_name;
const string& device_name = platform_device.device_name;
string hardware_id = platform_device.hardware_id;
if(hardware_id == "") {
hardware_id = string_printf("ID_%d", num_devices);
}
string id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices);
/* Devices are numbered consecutively across platforms. */
int num_devices = 0;
set<string> unique_ids;
foreach (OpenCLPlatformDevice &platform_device, usable_devices) {
/* Compute unique ID for persistent user preferences. */
const string &platform_name = platform_device.platform_name;
const string &device_name = platform_device.device_name;
string hardware_id = platform_device.hardware_id;
if (hardware_id == "") {
hardware_id = string_printf("ID_%d", num_devices);
}
string id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
/* Hardware ID might not be unique, add device number in that case. */
if(unique_ids.find(id) != unique_ids.end()) {
id += string_printf("_ID_%d", num_devices);
}
unique_ids.insert(id);
/* Hardware ID might not be unique, add device number in that case. */
if (unique_ids.find(id) != unique_ids.end()) {
id += string_printf("_ID_%d", num_devices);
}
unique_ids.insert(id);
/* Create DeviceInfo. */
DeviceInfo info;
info.type = DEVICE_OPENCL;
info.description = string_remove_trademark(string(device_name));
info.num = num_devices;
/* We don't know if it's used for display, but assume it is. */
info.display_device = true;
info.use_split_kernel = true;
info.has_volume_decoupled = false;
info.id = id;
/* Create DeviceInfo. */
DeviceInfo info;
info.type = DEVICE_OPENCL;
info.description = string_remove_trademark(string(device_name));
info.num = num_devices;
/* We don't know if it's used for display, but assume it is. */
info.display_device = true;
info.use_split_kernel = true;
info.has_volume_decoupled = false;
info.id = id;
/* Check OpenCL extensions */
info.has_half_images = platform_device.device_extensions.find("cl_khr_fp16") != string::npos;
/* Check OpenCL extensions */
info.has_half_images = platform_device.device_extensions.find("cl_khr_fp16") != string::npos;
devices.push_back(info);
num_devices++;
}
devices.push_back(info);
num_devices++;
}
}
string device_opencl_capabilities()
{
if(OpenCLInfo::device_type() == 0) {
return "All OpenCL devices are forced to be OFF";
}
string result = "";
string error_msg = ""; /* Only used by opencl_assert(), but in the future
* it could also be nicely reported to the console.
*/
cl_uint num_platforms = 0;
opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms));
if(num_platforms == 0) {
return "No OpenCL platforms found\n";
}
result += string_printf("Number of platforms: %u\n", num_platforms);
if (OpenCLInfo::device_type() == 0) {
return "All OpenCL devices are forced to be OFF";
}
string result = "";
string error_msg = ""; /* Only used by opencl_assert(), but in the future
* it could also be nicely reported to the console.
*/
cl_uint num_platforms = 0;
opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms));
if (num_platforms == 0) {
return "No OpenCL platforms found\n";
}
result += string_printf("Number of platforms: %u\n", num_platforms);
vector<cl_platform_id> platform_ids;
platform_ids.resize(num_platforms);
opencl_assert(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL));
vector<cl_platform_id> platform_ids;
platform_ids.resize(num_platforms);
opencl_assert(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL));
typedef char cl_string[1024];
typedef char cl_string[1024];
#define APPEND_INFO(func, id, name, what, type) \
do { \
type data; \
memset(&data, 0, sizeof(data)); \
opencl_assert(func(id, what, sizeof(data), &data, NULL)); \
result += string_printf("%s: %s\n", name, to_string(data).c_str()); \
} while(false)
#define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \
do { \
char data[1024] = "\0"; \
size_t length = 0; \
if(func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \
if(length != 0 && data[0] != '\0') { \
result += string_printf("%s: %s\n", name, data); \
} \
} \
} while(false)
#define APPEND_PLATFORM_INFO(id, name, what, type) \
APPEND_INFO(clGetPlatformInfo, id, "\tPlatform " name, what, type)
#define APPEND_DEVICE_INFO(id, name, what, type) \
APPEND_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what, type)
#define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \
APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
# define APPEND_INFO(func, id, name, what, type) \
do { \
type data; \
memset(&data, 0, sizeof(data)); \
opencl_assert(func(id, what, sizeof(data), &data, NULL)); \
result += string_printf("%s: %s\n", name, to_string(data).c_str()); \
} while (false)
# define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \
do { \
char data[1024] = "\0"; \
size_t length = 0; \
if (func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \
if (length != 0 && data[0] != '\0') { \
result += string_printf("%s: %s\n", name, data); \
} \
} \
} while (false)
# define APPEND_PLATFORM_INFO(id, name, what, type) \
APPEND_INFO(clGetPlatformInfo, id, "\tPlatform " name, what, type)
# define APPEND_DEVICE_INFO(id, name, what, type) \
APPEND_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what, type)
# define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \
APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
vector<cl_device_id> device_ids;
for(cl_uint platform = 0; platform < num_platforms; ++platform) {
cl_platform_id platform_id = platform_ids[platform];
vector<cl_device_id> device_ids;
for (cl_uint platform = 0; platform < num_platforms; ++platform) {
cl_platform_id platform_id = platform_ids[platform];
result += string_printf("Platform #%u\n", platform);
result += string_printf("Platform #%u\n", platform);
APPEND_PLATFORM_INFO(platform_id, "Name", CL_PLATFORM_NAME, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Vendor", CL_PLATFORM_VENDOR, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Version", CL_PLATFORM_VERSION, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Profile", CL_PLATFORM_PROFILE, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Name", CL_PLATFORM_NAME, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Vendor", CL_PLATFORM_VENDOR, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Version", CL_PLATFORM_VERSION, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Profile", CL_PLATFORM_PROFILE, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS, cl_string);
cl_uint num_devices = 0;
opencl_assert(clGetDeviceIDs(platform_ids[platform],
CL_DEVICE_TYPE_ALL,
0,
NULL,
&num_devices));
result += string_printf("\tNumber of devices: %u\n", num_devices);
cl_uint num_devices = 0;
opencl_assert(
clGetDeviceIDs(platform_ids[platform], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
result += string_printf("\tNumber of devices: %u\n", num_devices);
device_ids.resize(num_devices);
opencl_assert(clGetDeviceIDs(platform_ids[platform],
CL_DEVICE_TYPE_ALL,
num_devices,
&device_ids[0],
NULL));
for(cl_uint device = 0; device < num_devices; ++device) {
cl_device_id device_id = device_ids[device];
device_ids.resize(num_devices);
opencl_assert(clGetDeviceIDs(
platform_ids[platform], CL_DEVICE_TYPE_ALL, num_devices, &device_ids[0], NULL));
for (cl_uint device = 0; device < num_devices; ++device) {
cl_device_id device_id = device_ids[device];
result += string_printf("\t\tDevice: #%u\n", device);
result += string_printf("\t\tDevice: #%u\n", device);
APPEND_DEVICE_INFO(device_id, "Name", CL_DEVICE_NAME, cl_string);
APPEND_DEVICE_STRING_EXTENSION_INFO(device_id, "Board Name", CL_DEVICE_BOARD_NAME_AMD);
APPEND_DEVICE_INFO(device_id, "Vendor", CL_DEVICE_VENDOR, cl_string);
APPEND_DEVICE_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Profile", CL_DEVICE_PROFILE, cl_string);
APPEND_DEVICE_INFO(device_id, "Version", CL_DEVICE_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS, cl_string);
APPEND_DEVICE_INFO(device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max compute units", CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max work group size", CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t);
}
}
APPEND_DEVICE_INFO(device_id, "Name", CL_DEVICE_NAME, cl_string);
APPEND_DEVICE_STRING_EXTENSION_INFO(device_id, "Board Name", CL_DEVICE_BOARD_NAME_AMD);
APPEND_DEVICE_INFO(device_id, "Vendor", CL_DEVICE_VENDOR, cl_string);
APPEND_DEVICE_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Profile", CL_DEVICE_PROFILE, cl_string);
APPEND_DEVICE_INFO(device_id, "Version", CL_DEVICE_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS, cl_string);
APPEND_DEVICE_INFO(
device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max compute units", CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max work group size", CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t);
}
}
#undef APPEND_STRING_INFO
#undef APPEND_PLATFORM_STRING_INFO
#undef APPEND_DEVICE_STRING_INFO
# undef APPEND_STRING_INFO
# undef APPEND_PLATFORM_STRING_INFO
# undef APPEND_DEVICE_STRING_INFO
return result;
return result;
}
CCL_NAMESPACE_END
#endif /* WITH_OPENCL */
#endif /* WITH_OPENCL */

View File

@@ -27,299 +27,304 @@ CCL_NAMESPACE_BEGIN
static const double alpha = 0.1; /* alpha for rolling average */
DeviceSplitKernel::DeviceSplitKernel(Device *device)
: device(device),
split_data(device, "split_data"),
ray_state(device, "ray_state", MEM_READ_WRITE),
queue_index(device, "queue_index"),
use_queues_flag(device, "use_queues_flag"),
work_pool_wgs(device, "work_pool_wgs"),
kernel_data_initialized(false)
: device(device),
split_data(device, "split_data"),
ray_state(device, "ray_state", MEM_READ_WRITE),
queue_index(device, "queue_index"),
use_queues_flag(device, "use_queues_flag"),
work_pool_wgs(device, "work_pool_wgs"),
kernel_data_initialized(false)
{
avg_time_per_sample = 0.0;
avg_time_per_sample = 0.0;
kernel_path_init = NULL;
kernel_scene_intersect = NULL;
kernel_lamp_emission = NULL;
kernel_do_volume = NULL;
kernel_queue_enqueue = NULL;
kernel_indirect_background = NULL;
kernel_shader_setup = NULL;
kernel_shader_sort = NULL;
kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
kernel_direct_lighting = NULL;
kernel_shadow_blocked_ao = NULL;
kernel_shadow_blocked_dl = NULL;
kernel_enqueue_inactive = NULL;
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
kernel_path_init = NULL;
kernel_scene_intersect = NULL;
kernel_lamp_emission = NULL;
kernel_do_volume = NULL;
kernel_queue_enqueue = NULL;
kernel_indirect_background = NULL;
kernel_shader_setup = NULL;
kernel_shader_sort = NULL;
kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
kernel_direct_lighting = NULL;
kernel_shadow_blocked_ao = NULL;
kernel_shadow_blocked_dl = NULL;
kernel_enqueue_inactive = NULL;
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
}
DeviceSplitKernel::~DeviceSplitKernel()
{
split_data.free();
ray_state.free();
use_queues_flag.free();
queue_index.free();
work_pool_wgs.free();
split_data.free();
ray_state.free();
use_queues_flag.free();
queue_index.free();
work_pool_wgs.free();
delete kernel_path_init;
delete kernel_scene_intersect;
delete kernel_lamp_emission;
delete kernel_do_volume;
delete kernel_queue_enqueue;
delete kernel_indirect_background;
delete kernel_shader_setup;
delete kernel_shader_sort;
delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
delete kernel_direct_lighting;
delete kernel_shadow_blocked_ao;
delete kernel_shadow_blocked_dl;
delete kernel_enqueue_inactive;
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
delete kernel_path_init;
delete kernel_scene_intersect;
delete kernel_lamp_emission;
delete kernel_do_volume;
delete kernel_queue_enqueue;
delete kernel_indirect_background;
delete kernel_shader_setup;
delete kernel_shader_sort;
delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
delete kernel_direct_lighting;
delete kernel_shadow_blocked_ao;
delete kernel_shadow_blocked_dl;
delete kernel_enqueue_inactive;
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
{
#define LOAD_KERNEL(name) \
kernel_##name = get_split_kernel_function(#name, requested_features); \
if(!kernel_##name) { \
device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
return false; \
}
kernel_##name = get_split_kernel_function(#name, requested_features); \
if (!kernel_##name) { \
device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
return false; \
}
LOAD_KERNEL(path_init);
LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission);
if (requested_features.use_volume) {
LOAD_KERNEL(do_volume);
}
LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(indirect_background);
LOAD_KERNEL(shader_setup);
LOAD_KERNEL(shader_sort);
LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
LOAD_KERNEL(direct_lighting);
LOAD_KERNEL(shadow_blocked_ao);
LOAD_KERNEL(shadow_blocked_dl);
LOAD_KERNEL(enqueue_inactive);
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
LOAD_KERNEL(path_init);
LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission);
if (requested_features.use_volume) {
LOAD_KERNEL(do_volume);
}
LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(indirect_background);
LOAD_KERNEL(shader_setup);
LOAD_KERNEL(shader_sort);
LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
LOAD_KERNEL(direct_lighting);
LOAD_KERNEL(shadow_blocked_ao);
LOAD_KERNEL(shadow_blocked_dl);
LOAD_KERNEL(enqueue_inactive);
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
#undef LOAD_KERNEL
/* Re-initialiaze kernel-dependent data when kernels change. */
kernel_data_initialized = false;
/* Re-initialiaze kernel-dependent data when kernels change. */
kernel_data_initialized = false;
return true;
return true;
}
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size)
{
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
VLOG(1) << "Split state element size: "
<< string_human_readable_number(size_per_element) << " bytes. ("
<< string_human_readable_size(size_per_element) << ").";
return max_buffer_size / size_per_element;
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
VLOG(1) << "Split state element size: " << string_human_readable_number(size_per_element)
<< " bytes. (" << string_human_readable_size(size_per_element) << ").";
return max_buffer_size / size_per_element;
}
bool DeviceSplitKernel::path_trace(DeviceTask *task,
RenderTile& tile,
device_memory& kgbuffer,
device_memory& kernel_data)
RenderTile &tile,
device_memory &kgbuffer,
device_memory &kernel_data)
{
if(device->have_error()) {
return false;
}
if (device->have_error()) {
return false;
}
/* Allocate all required global memory once. */
if(!kernel_data_initialized) {
kernel_data_initialized = true;
/* Allocate all required global memory once. */
if (!kernel_data_initialized) {
kernel_data_initialized = true;
/* Set local size */
int2 lsize = split_kernel_local_size();
local_size[0] = lsize[0];
local_size[1] = lsize[1];
/* Set local size */
int2 lsize = split_kernel_local_size();
local_size[0] = lsize[0];
local_size[1] = lsize[1];
/* Set global size */
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
/* Set global size */
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
/* Make sure that set work size is a multiple of local
* work size dimensions.
*/
global_size[0] = round_up(gsize[0], local_size[0]);
global_size[1] = round_up(gsize[1], local_size[1]);
/* Make sure that set work size is a multiple of local
* work size dimensions.
*/
global_size[0] = round_up(gsize[0], local_size[0]);
global_size[1] = round_up(gsize[1], local_size[1]);
int num_global_elements = global_size[0] * global_size[1];
assert(num_global_elements % WORK_POOL_SIZE == 0);
int num_global_elements = global_size[0] * global_size[1];
assert(num_global_elements % WORK_POOL_SIZE == 0);
/* Calculate max groups */
/* Calculate max groups */
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU;
unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU :
WORK_POOL_SIZE_GPU;
unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Allocate work_pool_wgs memory. */
work_pool_wgs.alloc_to_device(max_work_groups);
queue_index.alloc_to_device(NUM_QUEUES);
use_queues_flag.alloc_to_device(1);
split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
ray_state.alloc(num_global_elements);
}
/* Allocate work_pool_wgs memory. */
work_pool_wgs.alloc_to_device(max_work_groups);
queue_index.alloc_to_device(NUM_QUEUES);
use_queues_flag.alloc_to_device(1);
split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
ray_state.alloc(num_global_elements);
}
/* Number of elements in the global state buffer */
int num_global_elements = global_size[0] * global_size[1];
/* Number of elements in the global state buffer */
int num_global_elements = global_size[0] * global_size[1];
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
if(device->have_error()) { \
return false; \
} \
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
return false; \
}
if (device->have_error()) { \
return false; \
} \
if (!kernel_##name->enqueue( \
KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
return false; \
}
tile.sample = tile.start_sample;
tile.sample = tile.start_sample;
/* for exponential increase between tile updates */
int time_multiplier = 1;
/* for exponential increase between tile updates */
int time_multiplier = 1;
while(tile.sample < tile.start_sample + tile.num_samples) {
/* to keep track of how long it takes to run a number of samples */
double start_time = time_dt();
while (tile.sample < tile.start_sample + tile.num_samples) {
/* to keep track of how long it takes to run a number of samples */
double start_time = time_dt();
/* initial guess to start rolling average */
const int initial_num_samples = 1;
/* approx number of samples per second */
int samples_per_second = (avg_time_per_sample > 0.0) ?
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
/* initial guess to start rolling average */
const int initial_num_samples = 1;
/* approx number of samples per second */
int samples_per_second = (avg_time_per_sample > 0.0) ?
int(double(time_multiplier) / avg_time_per_sample) + 1 :
initial_num_samples;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
subtile.num_samples = min(samples_per_second,
tile.start_sample + tile.num_samples - tile.sample);
if(device->have_error()) {
return false;
}
if (device->have_error()) {
return false;
}
/* reset state memory here as global size for data_init
* kernel might not be large enough to do in kernel
*/
work_pool_wgs.zero_to_device();
split_data.zero_to_device();
ray_state.zero_to_device();
/* reset state memory here as global size for data_init
* kernel might not be large enough to do in kernel
*/
work_pool_wgs.zero_to_device();
split_data.zero_to_device();
ray_state.zero_to_device();
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile,
num_global_elements,
kgbuffer,
kernel_data,
split_data,
ray_state,
queue_index,
use_queues_flag,
work_pool_wgs))
{
return false;
}
if (!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile,
num_global_elements,
kgbuffer,
kernel_data,
split_data,
ray_state,
queue_index,
use_queues_flag,
work_pool_wgs)) {
return false;
}
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
bool activeRaysAvailable = true;
double cancel_time = DBL_MAX;
bool activeRaysAvailable = true;
double cancel_time = DBL_MAX;
while(activeRaysAvailable) {
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
for(int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
if (kernel_do_volume) {
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
}
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
while (activeRaysAvailable) {
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
for (int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
if (kernel_do_volume) {
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
}
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(
holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
if(task->get_cancel() && cancel_time == DBL_MAX) {
/* Wait up to twice as many seconds for current samples to finish
* to avoid artifacts in render result from ending too soon.
*/
cancel_time = time_dt() + 2.0 * time_multiplier;
}
if (task->get_cancel() && cancel_time == DBL_MAX) {
/* Wait up to twice as many seconds for current samples to finish
* to avoid artifacts in render result from ending too soon.
*/
cancel_time = time_dt() + 2.0 * time_multiplier;
}
if(time_dt() > cancel_time) {
return true;
}
}
if (time_dt() > cancel_time) {
return true;
}
}
/* Decide if we should exit path-iteration in host. */
ray_state.copy_from_device(0, global_size[0] * global_size[1], 1);
/* Decide if we should exit path-iteration in host. */
ray_state.copy_from_device(0, global_size[0] * global_size[1], 1);
activeRaysAvailable = false;
activeRaysAvailable = false;
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
/* Something went wrong, abort to avoid looping endlessly. */
device->set_error("Split kernel error: invalid ray state");
return false;
}
for (int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
if (!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
if (IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
/* Something went wrong, abort to avoid looping endlessly. */
device->set_error("Split kernel error: invalid ray state");
return false;
}
/* Not all rays are RAY_INACTIVE. */
activeRaysAvailable = true;
break;
}
}
/* Not all rays are RAY_INACTIVE. */
activeRaysAvailable = true;
break;
}
}
if(time_dt() > cancel_time) {
return true;
}
}
if (time_dt() > cancel_time) {
return true;
}
}
double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
if(avg_time_per_sample == 0.0) {
/* start rolling average */
avg_time_per_sample = time_per_sample;
}
else {
avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
}
if (avg_time_per_sample == 0.0) {
/* start rolling average */
avg_time_per_sample = time_per_sample;
}
else {
avg_time_per_sample = alpha * time_per_sample + (1.0 - alpha) * avg_time_per_sample;
}
#undef ENQUEUE_SPLIT_KERNEL
tile.sample += subtile.num_samples;
task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
tile.sample += subtile.num_samples;
task->update_progress(&tile, tile.w * tile.h * subtile.num_samples);
time_multiplier = min(time_multiplier << 1, 10);
time_multiplier = min(time_multiplier << 1, 10);
if(task->get_cancel()) {
return true;
}
}
if (task->get_cancel()) {
return true;
}
}
return true;
return true;
}
CCL_NAMESPACE_END

View File

@@ -27,106 +27,115 @@ CCL_NAMESPACE_BEGIN
* Since some bytes may be needed for aligning chunks of memory;
* This is the amount of memory that we dedicate for that purpose.
*/
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
/* Types used for split kernel */
class KernelDimensions {
public:
size_t global_size[2];
size_t local_size[2];
public:
size_t global_size[2];
size_t local_size[2];
KernelDimensions(size_t global_size_[2], size_t local_size_[2])
{
memcpy(global_size, global_size_, sizeof(global_size));
memcpy(local_size, local_size_, sizeof(local_size));
}
KernelDimensions(size_t global_size_[2], size_t local_size_[2])
{
memcpy(global_size, global_size_, sizeof(global_size));
memcpy(local_size, local_size_, sizeof(local_size));
}
};
class SplitKernelFunction {
public:
virtual ~SplitKernelFunction() {}
public:
virtual ~SplitKernelFunction()
{
}
/* enqueue the kernel, returns false if there is an error */
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
/* enqueue the kernel, returns false if there is an error */
virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data) = 0;
};
class DeviceSplitKernel {
private:
Device *device;
private:
Device *device;
SplitKernelFunction *kernel_path_init;
SplitKernelFunction *kernel_scene_intersect;
SplitKernelFunction *kernel_lamp_emission;
SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_indirect_background;
SplitKernelFunction *kernel_shader_setup;
SplitKernelFunction *kernel_shader_sort;
SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;
SplitKernelFunction *kernel_direct_lighting;
SplitKernelFunction *kernel_shadow_blocked_ao;
SplitKernelFunction *kernel_shadow_blocked_dl;
SplitKernelFunction *kernel_enqueue_inactive;
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
SplitKernelFunction *kernel_path_init;
SplitKernelFunction *kernel_scene_intersect;
SplitKernelFunction *kernel_lamp_emission;
SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_indirect_background;
SplitKernelFunction *kernel_shader_setup;
SplitKernelFunction *kernel_shader_sort;
SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;
SplitKernelFunction *kernel_direct_lighting;
SplitKernelFunction *kernel_shadow_blocked_ao;
SplitKernelFunction *kernel_shadow_blocked_dl;
SplitKernelFunction *kernel_enqueue_inactive;
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
* kernel will be available to another kernel via this global
* memory.
*/
device_only_memory<uchar> split_data;
device_vector<uchar> ray_state;
device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
* kernel will be available to another kernel via this global
* memory.
*/
device_only_memory<uchar> split_data;
device_vector<uchar> ray_state;
device_only_memory<int>
queue_index; /* Array of size num_queues that tracks the size of each queue. */
/* Flag to make sceneintersect and lampemission kernel use queues. */
device_only_memory<char> use_queues_flag;
/* Flag to make sceneintersect and lampemission kernel use queues. */
device_only_memory<char> use_queues_flag;
/* Approximate time it takes to complete one sample */
double avg_time_per_sample;
/* Approximate time it takes to complete one sample */
double avg_time_per_sample;
/* Work pool with respect to each work group. */
device_only_memory<unsigned int> work_pool_wgs;
/* Work pool with respect to each work group. */
device_only_memory<unsigned int> work_pool_wgs;
/* Cached kernel-dependent data, initialized once. */
bool kernel_data_initialized;
size_t local_size[2];
size_t global_size[2];
/* Cached kernel-dependent data, initialized once. */
bool kernel_data_initialized;
size_t local_size[2];
size_t global_size[2];
public:
explicit DeviceSplitKernel(Device* device);
virtual ~DeviceSplitKernel();
public:
explicit DeviceSplitKernel(Device *device);
virtual ~DeviceSplitKernel();
bool load_kernels(const DeviceRequestedFeatures& requested_features);
bool path_trace(DeviceTask *task,
RenderTile& rtile,
device_memory& kgbuffer,
device_memory& kernel_data);
bool load_kernels(const DeviceRequestedFeatures &requested_features);
bool path_trace(DeviceTask *task,
RenderTile &rtile,
device_memory &kgbuffer,
device_memory &kernel_data);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
virtual uint64_t state_buffer_size(device_memory &kg,
device_memory &data,
size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
int num_global_elements,
device_memory& kernel_globals,
device_memory& kernel_data_,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flag,
device_memory& work_pool_wgs) = 0;
virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
RenderTile &rtile,
int num_global_elements,
device_memory &kernel_globals,
device_memory &kernel_data_,
device_memory &split_data,
device_memory &ray_state,
device_memory &queue_index,
device_memory &use_queues_flag,
device_memory &work_pool_wgs) = 0;
virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
const DeviceRequestedFeatures&) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
DeviceTask *task) = 0;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_SPLIT_KERNEL_H__ */
#endif /* __DEVICE_SPLIT_KERNEL_H__ */

View File

@@ -29,100 +29,111 @@ CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0)
: type(type_),
x(0),
y(0),
w(0),
h(0),
rgba_byte(0),
rgba_half(0),
buffer(0),
sample(0),
num_samples(1),
shader_input(0),
shader_output(0),
shader_eval_type(0),
shader_filter(0),
shader_x(0),
shader_w(0)
{
last_update_time = time_dt();
last_update_time = time_dt();
}
int DeviceTask::get_subtask_count(int num, int max_size)
{
if(max_size != 0) {
int max_size_num;
if (max_size != 0) {
int max_size_num;
if(type == SHADER) {
max_size_num = (shader_w + max_size - 1)/max_size;
}
else {
max_size = max(1, max_size/w);
max_size_num = (h + max_size - 1)/max_size;
}
if (type == SHADER) {
max_size_num = (shader_w + max_size - 1) / max_size;
}
else {
max_size = max(1, max_size / w);
max_size_num = (h + max_size - 1) / max_size;
}
num = max(max_size_num, num);
}
num = max(max_size_num, num);
}
if(type == SHADER) {
num = min(shader_w, num);
}
else if(type == RENDER) {
}
else {
num = min(h, num);
}
if (type == SHADER) {
num = min(shader_w, num);
}
else if (type == RENDER) {
}
else {
num = min(h, num);
}
return num;
return num;
}
void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
{
num = get_subtask_count(num, max_size);
num = get_subtask_count(num, max_size);
if(type == SHADER) {
for(int i = 0; i < num; i++) {
int tx = shader_x + (shader_w/num)*i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
if (type == SHADER) {
for (int i = 0; i < num; i++) {
int tx = shader_x + (shader_w / num) * i;
int tw = (i == num - 1) ? shader_w - i * (shader_w / num) : shader_w / num;
DeviceTask task = *this;
DeviceTask task = *this;
task.shader_x = tx;
task.shader_w = tw;
task.shader_x = tx;
task.shader_w = tw;
tasks.push_back(task);
}
}
else if(type == RENDER) {
for(int i = 0; i < num; i++)
tasks.push_back(*this);
}
else {
for(int i = 0; i < num; i++) {
int ty = y + (h/num)*i;
int th = (i == num-1)? h - i*(h/num): h/num;
tasks.push_back(task);
}
}
else if (type == RENDER) {
for (int i = 0; i < num; i++)
tasks.push_back(*this);
}
else {
for (int i = 0; i < num; i++) {
int ty = y + (h / num) * i;
int th = (i == num - 1) ? h - i * (h / num) : h / num;
DeviceTask task = *this;
DeviceTask task = *this;
task.y = ty;
task.h = th;
task.y = ty;
task.h = th;
tasks.push_back(task);
}
}
tasks.push_back(task);
}
}
}
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
if((type != RENDER) &&
(type != SHADER))
return;
if ((type != RENDER) && (type != SHADER))
return;
if(update_progress_sample) {
if(pixel_samples == -1) {
pixel_samples = shader_w;
}
update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
}
if (update_progress_sample) {
if (pixel_samples == -1) {
pixel_samples = shader_w;
}
update_progress_sample(pixel_samples, rtile ? rtile->sample : 0);
}
if(update_tile_sample) {
double current_time = time_dt();
if (update_tile_sample) {
double current_time = time_dt();
if(current_time - last_update_time >= 1.0) {
update_tile_sample(*rtile);
if (current_time - last_update_time >= 1.0) {
update_tile_sample(*rtile);
last_update_time = current_time;
}
}
last_update_time = current_time;
}
}
}
CCL_NAMESPACE_END

View File

@@ -33,87 +33,88 @@ class RenderTile;
class Tile;
class DenoiseParams {
public:
/* Pixel radius for neighbouring pixels to take into account. */
int radius;
/* Controls neighbor pixel weighting for the denoising filter. */
float strength;
/* Preserve more or less detail based on feature passes. */
float feature_strength;
/* When removing pixels that don't carry information, use a relative threshold instead of an absolute one. */
bool relative_pca;
/* How many frames before and after the current center frame are included. */
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
public:
/* Pixel radius for neighbouring pixels to take into account. */
int radius;
/* Controls neighbor pixel weighting for the denoising filter. */
float strength;
/* Preserve more or less detail based on feature passes. */
float feature_strength;
/* When removing pixels that don't carry information, use a relative threshold instead of an absolute one. */
bool relative_pca;
/* How many frames before and after the current center frame are included. */
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
DenoiseParams()
{
radius = 8;
strength = 0.5f;
feature_strength = 0.5f;
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
}
DenoiseParams()
{
radius = 8;
strength = 0.5f;
feature_strength = 0.5f;
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
}
};
class DeviceTask : public Task {
public:
typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
Type type;
public:
typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
Type type;
int x, y, w, h;
device_ptr rgba_byte;
device_ptr rgba_half;
device_ptr buffer;
int sample;
int num_samples;
int offset, stride;
int x, y, w, h;
device_ptr rgba_byte;
device_ptr rgba_half;
device_ptr buffer;
int sample;
int num_samples;
int offset, stride;
device_ptr shader_input;
device_ptr shader_output;
int shader_eval_type;
int shader_filter;
int shader_x, shader_w;
device_ptr shader_input;
device_ptr shader_output;
int shader_eval_type;
int shader_filter;
int shader_x, shader_w;
int passes_size;
int passes_size;
explicit DeviceTask(Type type = RENDER);
explicit DeviceTask(Type type = RENDER);
int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask>& tasks, int num, int max_size = 0);
int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask> &tasks, int num, int max_size = 0);
void update_progress(RenderTile *rtile, int pixel_samples = -1);
void update_progress(RenderTile *rtile, int pixel_samples = -1);
function<bool(Device *device, RenderTile&)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
function<bool()> get_cancel;
function<void(RenderTile*, Device*)> map_neighbor_tiles;
function<void(RenderTile*, Device*)> unmap_neighbor_tiles;
function<bool(Device *device, RenderTile &)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
function<bool()> get_cancel;
function<void(RenderTile *, Device *)> map_neighbor_tiles;
function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
DenoiseParams denoising;
bool denoising_from_render;
vector<int> denoising_frames;
DenoiseParams denoising;
bool denoising_from_render;
vector<int> denoising_frames;
bool denoising_do_filter;
bool denoising_write_passes;
bool denoising_do_filter;
bool denoising_write_passes;
int pass_stride;
int frame_stride;
int target_pass_stride;
int pass_denoising_data;
int pass_denoising_clean;
int pass_stride;
int frame_stride;
int target_pass_stride;
int pass_denoising_data;
int pass_denoising_clean;
bool need_finish_queue;
bool integrator_branched;
int2 requested_tile_size;
protected:
double last_update_time;
bool need_finish_queue;
bool integrator_branched;
int2 requested_tile_size;
protected:
double last_update_time;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_TASK_H__ */
#endif /* __DEVICE_TASK_H__ */

View File

@@ -16,241 +16,246 @@
#ifdef WITH_OPENCL
#include "util/util_foreach.h"
# include "util/util_foreach.h"
#include "device/opencl/opencl.h"
#include "device/opencl/memory_manager.h"
# include "device/opencl/opencl.h"
# include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
void MemoryManager::DeviceBuffer::add_allocation(Allocation& allocation)
void MemoryManager::DeviceBuffer::add_allocation(Allocation &allocation)
{
allocations.push_back(&allocation);
allocations.push_back(&allocation);
}
void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
{
bool need_realloc = false;
bool need_realloc = false;
/* Calculate total size and remove any freed. */
size_t total_size = 0;
/* Calculate total size and remove any freed. */
size_t total_size = 0;
for(int i = allocations.size()-1; i >= 0; i--) {
Allocation* allocation = allocations[i];
for (int i = allocations.size() - 1; i >= 0; i--) {
Allocation *allocation = allocations[i];
/* Remove allocations that have been freed. */
if(!allocation->mem || allocation->mem->memory_size() == 0) {
allocation->device_buffer = NULL;
allocation->size = 0;
/* Remove allocations that have been freed. */
if (!allocation->mem || allocation->mem->memory_size() == 0) {
allocation->device_buffer = NULL;
allocation->size = 0;
allocations.erase(allocations.begin()+i);
allocations.erase(allocations.begin() + i);
need_realloc = true;
need_realloc = true;
continue;
}
continue;
}
/* Get actual size for allocation. */
size_t alloc_size = align_up(allocation->mem->memory_size(), 16);
/* Get actual size for allocation. */
size_t alloc_size = align_up(allocation->mem->memory_size(), 16);
if(allocation->size != alloc_size) {
/* Allocation is either new or resized. */
allocation->size = alloc_size;
allocation->needs_copy_to_device = true;
if (allocation->size != alloc_size) {
/* Allocation is either new or resized. */
allocation->size = alloc_size;
allocation->needs_copy_to_device = true;
need_realloc = true;
}
need_realloc = true;
}
total_size += alloc_size;
}
total_size += alloc_size;
}
if(need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if (need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(
device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if(total_size > max_buffer_size) {
device->set_error("Scene too complex to fit in available memory.");
return;
}
if (total_size > max_buffer_size) {
device->set_error("Scene too complex to fit in available memory.");
return;
}
device_only_memory<uchar> *new_buffer =
new device_only_memory<uchar>(device, "memory manager buffer");
device_only_memory<uchar> *new_buffer = new device_only_memory<uchar>(device,
"memory manager buffer");
new_buffer->alloc_to_device(total_size);
new_buffer->alloc_to_device(total_size);
size_t offset = 0;
size_t offset = 0;
foreach(Allocation* allocation, allocations) {
if(allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(new_buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0, NULL, NULL
));
foreach (Allocation *allocation, allocations) {
if (allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device,
clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(new_buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0,
NULL,
NULL));
allocation->needs_copy_to_device = false;
}
else {
/* Fast copy from memory already on device. */
opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_MEM_PTR(new_buffer->device_pointer),
allocation->desc.offset,
offset,
allocation->mem->memory_size(),
0, NULL, NULL
));
}
allocation->needs_copy_to_device = false;
}
else {
/* Fast copy from memory already on device. */
opencl_device_assert(device,
clEnqueueCopyBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_MEM_PTR(new_buffer->device_pointer),
allocation->desc.offset,
offset,
allocation->mem->memory_size(),
0,
NULL,
NULL));
}
allocation->desc.offset = offset;
offset += allocation->size;
}
allocation->desc.offset = offset;
offset += allocation->size;
}
delete buffer;
delete buffer;
buffer = new_buffer;
}
else {
assert(total_size == buffer->data_size);
buffer = new_buffer;
}
else {
assert(total_size == buffer->data_size);
size_t offset = 0;
size_t offset = 0;
foreach(Allocation* allocation, allocations) {
if(allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0, NULL, NULL
));
foreach (Allocation *allocation, allocations) {
if (allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device,
clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0,
NULL,
NULL));
allocation->needs_copy_to_device = false;
}
allocation->needs_copy_to_device = false;
}
offset += allocation->size;
}
}
offset += allocation->size;
}
}
/* Not really necessary, but seems to improve responsiveness for some reason. */
clFinish(device->cqCommandQueue);
/* Not really necessary, but seems to improve responsiveness for some reason. */
clFinish(device->cqCommandQueue);
}
void MemoryManager::DeviceBuffer::free(OpenCLDevice *)
{
buffer->free();
buffer->free();
}
MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
MemoryManager::DeviceBuffer *MemoryManager::smallest_device_buffer()
{
DeviceBuffer* smallest = device_buffers;
DeviceBuffer *smallest = device_buffers;
foreach(DeviceBuffer& device_buffer, device_buffers) {
if(device_buffer.size < smallest->size) {
smallest = &device_buffer;
}
}
foreach (DeviceBuffer &device_buffer, device_buffers) {
if (device_buffer.size < smallest->size) {
smallest = &device_buffer;
}
}
return smallest;
return smallest;
}
MemoryManager::MemoryManager(OpenCLDevice *device)
: device(device), need_update(false)
MemoryManager::MemoryManager(OpenCLDevice *device) : device(device), need_update(false)
{
foreach(DeviceBuffer& device_buffer, device_buffers) {
device_buffer.buffer =
new device_only_memory<uchar>(device, "memory manager buffer");
}
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.buffer = new device_only_memory<uchar>(device, "memory manager buffer");
}
}
void MemoryManager::free()
{
foreach(DeviceBuffer& device_buffer, device_buffers) {
device_buffer.free(device);
}
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.free(device);
}
}
void MemoryManager::alloc(const char *name, device_memory& mem)
void MemoryManager::alloc(const char *name, device_memory &mem)
{
Allocation& allocation = allocations[name];
Allocation &allocation = allocations[name];
allocation.mem = &mem;
allocation.needs_copy_to_device = true;
allocation.mem = &mem;
allocation.needs_copy_to_device = true;
if(!allocation.device_buffer) {
DeviceBuffer* device_buffer = smallest_device_buffer();
allocation.device_buffer = device_buffer;
if (!allocation.device_buffer) {
DeviceBuffer *device_buffer = smallest_device_buffer();
allocation.device_buffer = device_buffer;
allocation.desc.device_buffer = device_buffer - device_buffers;
allocation.desc.device_buffer = device_buffer - device_buffers;
device_buffer->add_allocation(allocation);
device_buffer->add_allocation(allocation);
device_buffer->size += mem.memory_size();
}
device_buffer->size += mem.memory_size();
}
need_update = true;
need_update = true;
}
bool MemoryManager::free(device_memory& mem)
bool MemoryManager::free(device_memory &mem)
{
foreach(AllocationsMap::value_type& value, allocations) {
Allocation& allocation = value.second;
if(allocation.mem == &mem) {
foreach (AllocationsMap::value_type &value, allocations) {
Allocation &allocation = value.second;
if (allocation.mem == &mem) {
allocation.device_buffer->size -= mem.memory_size();
allocation.device_buffer->size -= mem.memory_size();
allocation.mem = NULL;
allocation.needs_copy_to_device = false;
allocation.mem = NULL;
allocation.needs_copy_to_device = false;
need_update = true;
return true;
}
}
need_update = true;
return true;
}
}
return false;
return false;
}
MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name)
{
update_device_memory();
update_device_memory();
Allocation& allocation = allocations[name];
return allocation.desc;
Allocation &allocation = allocations[name];
return allocation.desc;
}
void MemoryManager::update_device_memory()
{
if(!need_update) {
return;
}
if (!need_update) {
return;
}
need_update = false;
need_update = false;
foreach(DeviceBuffer& device_buffer, device_buffers) {
device_buffer.update_device_memory(device);
}
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.update_device_memory(device);
}
}
void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
{
update_device_memory();
update_device_memory();
foreach(DeviceBuffer& device_buffer, device_buffers) {
if(device_buffer.buffer->device_pointer) {
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
}
else {
device->kernel_set_args(kernel, (*narg)++, device->null_mem);
}
}
foreach (DeviceBuffer &device_buffer, device_buffers) {
if (device_buffer.buffer->device_pointer) {
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
}
else {
device->kernel_set_args(kernel, (*narg)++, device->null_mem);
}
}
}
CCL_NAMESPACE_END
#endif /* WITH_OPENCL */
#endif /* WITH_OPENCL */

View File

@@ -29,78 +29,77 @@ CCL_NAMESPACE_BEGIN
class OpenCLDevice;
class MemoryManager {
public:
static const int NUM_DEVICE_BUFFERS = 8;
public:
static const int NUM_DEVICE_BUFFERS = 8;
struct BufferDescriptor {
uint device_buffer;
cl_ulong offset;
};
struct BufferDescriptor {
uint device_buffer;
cl_ulong offset;
};
private:
struct DeviceBuffer;
private:
struct DeviceBuffer;
struct Allocation {
device_memory *mem;
struct Allocation {
device_memory *mem;
DeviceBuffer *device_buffer;
size_t size; /* Size of actual allocation, may be larger than requested. */
DeviceBuffer *device_buffer;
size_t size; /* Size of actual allocation, may be larger than requested. */
BufferDescriptor desc;
BufferDescriptor desc;
bool needs_copy_to_device;
bool needs_copy_to_device;
Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false)
{
}
};
Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false)
{
}
};
struct DeviceBuffer {
device_only_memory<uchar> *buffer;
vector<Allocation*> allocations;
size_t size; /* Size of all allocations. */
struct DeviceBuffer {
device_only_memory<uchar> *buffer;
vector<Allocation *> allocations;
size_t size; /* Size of all allocations. */
DeviceBuffer()
: buffer(NULL), size(0)
{
}
DeviceBuffer() : buffer(NULL), size(0)
{
}
~DeviceBuffer()
{
delete buffer;
buffer = NULL;
}
~DeviceBuffer()
{
delete buffer;
buffer = NULL;
}
void add_allocation(Allocation& allocation);
void add_allocation(Allocation &allocation);
void update_device_memory(OpenCLDevice *device);
void update_device_memory(OpenCLDevice *device);
void free(OpenCLDevice *device);
};
void free(OpenCLDevice *device);
};
OpenCLDevice *device;
OpenCLDevice *device;
DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS];
DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS];
typedef unordered_map<string, Allocation> AllocationsMap;
AllocationsMap allocations;
typedef unordered_map<string, Allocation> AllocationsMap;
AllocationsMap allocations;
bool need_update;
bool need_update;
DeviceBuffer* smallest_device_buffer();
DeviceBuffer *smallest_device_buffer();
public:
MemoryManager(OpenCLDevice *device);
public:
MemoryManager(OpenCLDevice *device);
void free(); /* Free all memory. */
void free(); /* Free all memory. */
void alloc(const char *name, device_memory& mem);
bool free(device_memory& mem);
void alloc(const char *name, device_memory &mem);
bool free(device_memory &mem);
BufferDescriptor get_descriptor(string name);
BufferDescriptor get_descriptor(string name);
void update_device_memory();
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
void update_device_memory();
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
};
CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,11 @@
set(LICENSES
Apache_2.0.txt
ILM.txt
NVidia.txt
OSL.txt
Sobol.txt
readme.txt
Apache_2.0.txt
ILM.txt
NVidia.txt
OSL.txt
Sobol.txt
readme.txt
)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${LICENSES}" ${CYCLES_INSTALL_PATH}/license)

View File

@@ -1,19 +1,19 @@
set(INC
..
..
)
set(SRC
node.cpp
node_type.cpp
node_xml.cpp
node.cpp
node_type.cpp
node_xml.cpp
)
set(SRC_HEADERS
node.h
node_enum.h
node_type.h
node_xml.h
node.h
node_enum.h
node_type.h
node_xml.h
)
set(LIB

View File

@@ -26,550 +26,645 @@ CCL_NAMESPACE_BEGIN
/* Node Type */
Node::Node(const NodeType *type_, ustring name_)
: name(name_), type(type_)
Node::Node(const NodeType *type_, ustring name_) : name(name_), type(type_)
{
assert(type);
assert(type);
/* assign non-empty name, convenient for debugging */
if(name.empty()) {
name = type->name;
}
/* assign non-empty name, convenient for debugging */
if (name.empty()) {
name = type->name;
}
/* initialize default values */
foreach(const SocketType& socket, type->inputs) {
set_default_value(socket);
}
/* initialize default values */
foreach (const SocketType &socket, type->inputs) {
set_default_value(socket);
}
}
Node::~Node()
{
}
template<typename T>
static T& get_socket_value(const Node *node, const SocketType& socket)
template<typename T> static T &get_socket_value(const Node *node, const SocketType &socket)
{
return (T&)*(((char*)node) + socket.struct_offset);
return (T &)*(((char *)node) + socket.struct_offset);
}
#ifndef NDEBUG
static bool is_socket_float3(const SocketType& socket)
static bool is_socket_float3(const SocketType &socket)
{
return socket.type == SocketType::COLOR ||
socket.type == SocketType::POINT ||
socket.type == SocketType::VECTOR ||
socket.type == SocketType::NORMAL;
return socket.type == SocketType::COLOR || socket.type == SocketType::POINT ||
socket.type == SocketType::VECTOR || socket.type == SocketType::NORMAL;
}
static bool is_socket_array_float3(const SocketType& socket)
static bool is_socket_array_float3(const SocketType &socket)
{
return socket.type == SocketType::COLOR_ARRAY ||
socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::VECTOR_ARRAY ||
socket.type == SocketType::NORMAL_ARRAY;
return socket.type == SocketType::COLOR_ARRAY || socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::VECTOR_ARRAY || socket.type == SocketType::NORMAL_ARRAY;
}
#endif
/* set values */
void Node::set(const SocketType& input, bool value)
void Node::set(const SocketType &input, bool value)
{
assert(input.type == SocketType::BOOLEAN);
get_socket_value<bool>(this, input) = value;
assert(input.type == SocketType::BOOLEAN);
get_socket_value<bool>(this, input) = value;
}
void Node::set(const SocketType& input, int value)
void Node::set(const SocketType &input, int value)
{
assert((input.type == SocketType::INT || input.type == SocketType::ENUM));
get_socket_value<int>(this, input) = value;
assert((input.type == SocketType::INT || input.type == SocketType::ENUM));
get_socket_value<int>(this, input) = value;
}
void Node::set(const SocketType& input, uint value)
void Node::set(const SocketType &input, uint value)
{
assert(input.type == SocketType::UINT);
get_socket_value<uint>(this, input) = value;
assert(input.type == SocketType::UINT);
get_socket_value<uint>(this, input) = value;
}
void Node::set(const SocketType& input, float value)
void Node::set(const SocketType &input, float value)
{
assert(input.type == SocketType::FLOAT);
get_socket_value<float>(this, input) = value;
assert(input.type == SocketType::FLOAT);
get_socket_value<float>(this, input) = value;
}
void Node::set(const SocketType& input, float2 value)
void Node::set(const SocketType &input, float2 value)
{
assert(input.type == SocketType::FLOAT);
get_socket_value<float2>(this, input) = value;
assert(input.type == SocketType::FLOAT);
get_socket_value<float2>(this, input) = value;
}
void Node::set(const SocketType& input, float3 value)
void Node::set(const SocketType &input, float3 value)
{
assert(is_socket_float3(input));
get_socket_value<float3>(this, input) = value;
assert(is_socket_float3(input));
get_socket_value<float3>(this, input) = value;
}
void Node::set(const SocketType& input, const char *value)
void Node::set(const SocketType &input, const char *value)
{
set(input, ustring(value));
set(input, ustring(value));
}
void Node::set(const SocketType& input, ustring value)
void Node::set(const SocketType &input, ustring value)
{
if(input.type == SocketType::STRING) {
get_socket_value<ustring>(this, input) = value;
}
else if(input.type == SocketType::ENUM) {
const NodeEnum& enm = *input.enum_values;
if(enm.exists(value)) {
get_socket_value<int>(this, input) = enm[value];
}
else {
assert(0);
}
}
else {
assert(0);
}
if (input.type == SocketType::STRING) {
get_socket_value<ustring>(this, input) = value;
}
else if (input.type == SocketType::ENUM) {
const NodeEnum &enm = *input.enum_values;
if (enm.exists(value)) {
get_socket_value<int>(this, input) = enm[value];
}
else {
assert(0);
}
}
else {
assert(0);
}
}
void Node::set(const SocketType& input, const Transform& value)
void Node::set(const SocketType &input, const Transform &value)
{
assert(input.type == SocketType::TRANSFORM);
get_socket_value<Transform>(this, input) = value;
assert(input.type == SocketType::TRANSFORM);
get_socket_value<Transform>(this, input) = value;
}
void Node::set(const SocketType& input, Node *value)
void Node::set(const SocketType &input, Node *value)
{
assert(input.type == SocketType::TRANSFORM);
get_socket_value<Node*>(this, input) = value;
assert(input.type == SocketType::TRANSFORM);
get_socket_value<Node *>(this, input) = value;
}
/* set array values */
void Node::set(const SocketType& input, array<bool>& value)
void Node::set(const SocketType &input, array<bool> &value)
{
assert(input.type == SocketType::BOOLEAN_ARRAY);
get_socket_value<array<bool> >(this, input).steal_data(value);
assert(input.type == SocketType::BOOLEAN_ARRAY);
get_socket_value<array<bool>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<int>& value)
void Node::set(const SocketType &input, array<int> &value)
{
assert(input.type == SocketType::INT_ARRAY);
get_socket_value<array<int> >(this, input).steal_data(value);
assert(input.type == SocketType::INT_ARRAY);
get_socket_value<array<int>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<float>& value)
void Node::set(const SocketType &input, array<float> &value)
{
assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float> >(this, input).steal_data(value);
assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<float2>& value)
void Node::set(const SocketType &input, array<float2> &value)
{
assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float2> >(this, input).steal_data(value);
assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float2>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<float3>& value)
void Node::set(const SocketType &input, array<float3> &value)
{
assert(is_socket_array_float3(input));
get_socket_value<array<float3> >(this, input).steal_data(value);
assert(is_socket_array_float3(input));
get_socket_value<array<float3>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<ustring>& value)
void Node::set(const SocketType &input, array<ustring> &value)
{
assert(input.type == SocketType::STRING_ARRAY);
get_socket_value<array<ustring> >(this, input).steal_data(value);
assert(input.type == SocketType::STRING_ARRAY);
get_socket_value<array<ustring>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<Transform>& value)
void Node::set(const SocketType &input, array<Transform> &value)
{
assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Transform> >(this, input).steal_data(value);
assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Transform>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<Node*>& value)
void Node::set(const SocketType &input, array<Node *> &value)
{
assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Node*> >(this, input).steal_data(value);
assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Node *>>(this, input).steal_data(value);
}
/* get values */
bool Node::get_bool(const SocketType& input) const
bool Node::get_bool(const SocketType &input) const
{
assert(input.type == SocketType::BOOLEAN);
return get_socket_value<bool>(this, input);
assert(input.type == SocketType::BOOLEAN);
return get_socket_value<bool>(this, input);
}
int Node::get_int(const SocketType& input) const
int Node::get_int(const SocketType &input) const
{
assert(input.type == SocketType::INT || input.type == SocketType::ENUM);
return get_socket_value<int>(this, input);
assert(input.type == SocketType::INT || input.type == SocketType::ENUM);
return get_socket_value<int>(this, input);
}
uint Node::get_uint(const SocketType& input) const
uint Node::get_uint(const SocketType &input) const
{
assert(input.type == SocketType::UINT);
return get_socket_value<uint>(this, input);
assert(input.type == SocketType::UINT);
return get_socket_value<uint>(this, input);
}
float Node::get_float(const SocketType& input) const
float Node::get_float(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT);
return get_socket_value<float>(this, input);
assert(input.type == SocketType::FLOAT);
return get_socket_value<float>(this, input);
}
float2 Node::get_float2(const SocketType& input) const
float2 Node::get_float2(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT);
return get_socket_value<float2>(this, input);
assert(input.type == SocketType::FLOAT);
return get_socket_value<float2>(this, input);
}
float3 Node::get_float3(const SocketType& input) const
float3 Node::get_float3(const SocketType &input) const
{
assert(is_socket_float3(input));
return get_socket_value<float3>(this, input);
assert(is_socket_float3(input));
return get_socket_value<float3>(this, input);
}
ustring Node::get_string(const SocketType& input) const
ustring Node::get_string(const SocketType &input) const
{
if(input.type == SocketType::STRING) {
return get_socket_value<ustring>(this, input);
}
else if(input.type == SocketType::ENUM) {
const NodeEnum& enm = *input.enum_values;
int intvalue = get_socket_value<int>(this, input);
return (enm.exists(intvalue)) ? enm[intvalue] : ustring();
}
else {
assert(0);
return ustring();
}
if (input.type == SocketType::STRING) {
return get_socket_value<ustring>(this, input);
}
else if (input.type == SocketType::ENUM) {
const NodeEnum &enm = *input.enum_values;
int intvalue = get_socket_value<int>(this, input);
return (enm.exists(intvalue)) ? enm[intvalue] : ustring();
}
else {
assert(0);
return ustring();
}
}
Transform Node::get_transform(const SocketType& input) const
Transform Node::get_transform(const SocketType &input) const
{
assert(input.type == SocketType::TRANSFORM);
return get_socket_value<Transform>(this, input);
assert(input.type == SocketType::TRANSFORM);
return get_socket_value<Transform>(this, input);
}
Node *Node::get_node(const SocketType& input) const
Node *Node::get_node(const SocketType &input) const
{
assert(input.type == SocketType::NODE);
return get_socket_value<Node*>(this, input);
assert(input.type == SocketType::NODE);
return get_socket_value<Node *>(this, input);
}
/* get array values */
const array<bool>& Node::get_bool_array(const SocketType& input) const
const array<bool> &Node::get_bool_array(const SocketType &input) const
{
assert(input.type == SocketType::BOOLEAN_ARRAY);
return get_socket_value<array<bool> >(this, input);
assert(input.type == SocketType::BOOLEAN_ARRAY);
return get_socket_value<array<bool>>(this, input);
}
const array<int>& Node::get_int_array(const SocketType& input) const
const array<int> &Node::get_int_array(const SocketType &input) const
{
assert(input.type == SocketType::INT_ARRAY);
return get_socket_value<array<int> >(this, input);
assert(input.type == SocketType::INT_ARRAY);
return get_socket_value<array<int>>(this, input);
}
const array<float>& Node::get_float_array(const SocketType& input) const
const array<float> &Node::get_float_array(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float> >(this, input);
assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float>>(this, input);
}
const array<float2>& Node::get_float2_array(const SocketType& input) const
const array<float2> &Node::get_float2_array(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float2> >(this, input);
assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float2>>(this, input);
}
const array<float3>& Node::get_float3_array(const SocketType& input) const
const array<float3> &Node::get_float3_array(const SocketType &input) const
{
assert(is_socket_array_float3(input));
return get_socket_value<array<float3> >(this, input);
assert(is_socket_array_float3(input));
return get_socket_value<array<float3>>(this, input);
}
const array<ustring>& Node::get_string_array(const SocketType& input) const
const array<ustring> &Node::get_string_array(const SocketType &input) const
{
assert(input.type == SocketType::STRING_ARRAY);
return get_socket_value<array<ustring> >(this, input);
assert(input.type == SocketType::STRING_ARRAY);
return get_socket_value<array<ustring>>(this, input);
}
const array<Transform>& Node::get_transform_array(const SocketType& input) const
const array<Transform> &Node::get_transform_array(const SocketType &input) const
{
assert(input.type == SocketType::TRANSFORM_ARRAY);
return get_socket_value<array<Transform> >(this, input);
assert(input.type == SocketType::TRANSFORM_ARRAY);
return get_socket_value<array<Transform>>(this, input);
}
const array<Node*>& Node::get_node_array(const SocketType& input) const
const array<Node *> &Node::get_node_array(const SocketType &input) const
{
assert(input.type == SocketType::NODE_ARRAY);
return get_socket_value<array<Node*> >(this, input);
assert(input.type == SocketType::NODE_ARRAY);
return get_socket_value<array<Node *>>(this, input);
}
/* generic value operations */
bool Node::has_default_value(const SocketType& input) const
bool Node::has_default_value(const SocketType &input) const
{
const void *src = input.default_value;
void *dst = &get_socket_value<char>(this, input);
return memcmp(dst, src, input.size()) == 0;
const void *src = input.default_value;
void *dst = &get_socket_value<char>(this, input);
return memcmp(dst, src, input.size()) == 0;
}
void Node::set_default_value(const SocketType& socket)
void Node::set_default_value(const SocketType &socket)
{
const void *src = socket.default_value;
void *dst = ((char*)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
const void *src = socket.default_value;
void *dst = ((char *)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
}
template<typename T>
static void copy_array(const Node *node, const SocketType& socket, const Node *other, const SocketType& other_socket)
static void copy_array(const Node *node,
const SocketType &socket,
const Node *other,
const SocketType &other_socket)
{
const array<T>* src = (const array<T>*)(((char*)other) + other_socket.struct_offset);
array<T>* dst = (array<T>*)(((char*)node) + socket.struct_offset);
*dst = *src;
const array<T> *src = (const array<T> *)(((char *)other) + other_socket.struct_offset);
array<T> *dst = (array<T> *)(((char *)node) + socket.struct_offset);
*dst = *src;
}
void Node::copy_value(const SocketType& socket, const Node& other, const SocketType& other_socket)
void Node::copy_value(const SocketType &socket, const Node &other, const SocketType &other_socket)
{
assert(socket.type == other_socket.type);
assert(socket.type == other_socket.type);
if(socket.is_array()) {
switch(socket.type) {
case SocketType::BOOLEAN_ARRAY: copy_array<bool>(this, socket, &other, other_socket); break;
case SocketType::FLOAT_ARRAY: copy_array<float>(this, socket, &other, other_socket); break;
case SocketType::INT_ARRAY: copy_array<int>(this, socket, &other, other_socket); break;
case SocketType::COLOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::VECTOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::POINT_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::NORMAL_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::POINT2_ARRAY: copy_array<float2>(this, socket, &other, other_socket); break;
case SocketType::STRING_ARRAY: copy_array<ustring>(this, socket, &other, other_socket); break;
case SocketType::TRANSFORM_ARRAY: copy_array<Transform>(this, socket, &other, other_socket); break;
case SocketType::NODE_ARRAY: copy_array<void*>(this, socket, &other, other_socket); break;
default: assert(0); break;
}
}
else {
const void *src = ((char*)&other) + other_socket.struct_offset;
void *dst = ((char*)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
}
if (socket.is_array()) {
switch (socket.type) {
case SocketType::BOOLEAN_ARRAY:
copy_array<bool>(this, socket, &other, other_socket);
break;
case SocketType::FLOAT_ARRAY:
copy_array<float>(this, socket, &other, other_socket);
break;
case SocketType::INT_ARRAY:
copy_array<int>(this, socket, &other, other_socket);
break;
case SocketType::COLOR_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::VECTOR_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::NORMAL_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT2_ARRAY:
copy_array<float2>(this, socket, &other, other_socket);
break;
case SocketType::STRING_ARRAY:
copy_array<ustring>(this, socket, &other, other_socket);
break;
case SocketType::TRANSFORM_ARRAY:
copy_array<Transform>(this, socket, &other, other_socket);
break;
case SocketType::NODE_ARRAY:
copy_array<void *>(this, socket, &other, other_socket);
break;
default:
assert(0);
break;
}
}
else {
const void *src = ((char *)&other) + other_socket.struct_offset;
void *dst = ((char *)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
}
}
template<typename T>
static bool is_array_equal(const Node *node, const Node *other, const SocketType& socket)
static bool is_array_equal(const Node *node, const Node *other, const SocketType &socket)
{
const array<T>* a = (const array<T>*)(((char*)node) + socket.struct_offset);
const array<T>* b = (const array<T>*)(((char*)other) + socket.struct_offset);
return *a == *b;
const array<T> *a = (const array<T> *)(((char *)node) + socket.struct_offset);
const array<T> *b = (const array<T> *)(((char *)other) + socket.struct_offset);
return *a == *b;
}
template<typename T>
static bool is_value_equal(const Node *node, const Node *other, const SocketType& socket)
static bool is_value_equal(const Node *node, const Node *other, const SocketType &socket)
{
const T *a = (const T*)(((char*)node) + socket.struct_offset);
const T *b = (const T*)(((char*)other) + socket.struct_offset);
return *a == *b;
const T *a = (const T *)(((char *)node) + socket.struct_offset);
const T *b = (const T *)(((char *)other) + socket.struct_offset);
return *a == *b;
}
bool Node::equals_value(const Node& other, const SocketType& socket) const
bool Node::equals_value(const Node &other, const SocketType &socket) const
{
switch(socket.type) {
case SocketType::BOOLEAN: return is_value_equal<bool>(this, &other, socket);
case SocketType::FLOAT: return is_value_equal<float>(this, &other, socket);
case SocketType::INT: return is_value_equal<int>(this, &other, socket);
case SocketType::UINT: return is_value_equal<uint>(this, &other, socket);
case SocketType::COLOR: return is_value_equal<float3>(this, &other, socket);
case SocketType::VECTOR: return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT: return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL: return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2: return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE: return true;
case SocketType::STRING: return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM: return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM: return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE: return is_value_equal<void*>(this, &other, socket);
switch (socket.type) {
case SocketType::BOOLEAN:
return is_value_equal<bool>(this, &other, socket);
case SocketType::FLOAT:
return is_value_equal<float>(this, &other, socket);
case SocketType::INT:
return is_value_equal<int>(this, &other, socket);
case SocketType::UINT:
return is_value_equal<uint>(this, &other, socket);
case SocketType::COLOR:
return is_value_equal<float3>(this, &other, socket);
case SocketType::VECTOR:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT:
return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2:
return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE:
return true;
case SocketType::STRING:
return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM:
return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM:
return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE:
return is_value_equal<void *>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY: return is_array_equal<bool>(this, &other, socket);
case SocketType::FLOAT_ARRAY: return is_array_equal<float>(this, &other, socket);
case SocketType::INT_ARRAY: return is_array_equal<int>(this, &other, socket);
case SocketType::COLOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::VECTOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY: return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY: return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY: return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY: return is_array_equal<void*>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY:
return is_array_equal<bool>(this, &other, socket);
case SocketType::FLOAT_ARRAY:
return is_array_equal<float>(this, &other, socket);
case SocketType::INT_ARRAY:
return is_array_equal<int>(this, &other, socket);
case SocketType::COLOR_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::VECTOR_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY:
return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY:
return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY:
return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY:
return is_array_equal<void *>(this, &other, socket);
case SocketType::UNDEFINED: return true;
}
case SocketType::UNDEFINED:
return true;
}
return true;
return true;
}
/* equals */
bool Node::equals(const Node& other) const
bool Node::equals(const Node &other) const
{
assert(type == other.type);
assert(type == other.type);
foreach(const SocketType& socket, type->inputs) {
if(!equals_value(other, socket))
return false;
}
foreach (const SocketType &socket, type->inputs) {
if (!equals_value(other, socket))
return false;
}
return true;
return true;
}
/* Hash */
namespace {
template<typename T>
void value_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
template<typename T> void value_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
md5.append(((uint8_t*)node) + socket.struct_offset, socket.size());
md5.append(((uint8_t *)node) + socket.struct_offset, socket.size());
}
void float3_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
void float3_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
/* Don't compare 4th element used for padding. */
md5.append(((uint8_t*)node) + socket.struct_offset, sizeof(float) * 3);
/* Don't compare 4th element used for padding. */
md5.append(((uint8_t *)node) + socket.struct_offset, sizeof(float) * 3);
}
template<typename T>
void array_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
template<typename T> void array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset);
for(size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t*)&a[i], sizeof(T));
}
const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
for (size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t *)&a[i], sizeof(T));
}
}
void float3_array_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
void float3_array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
/* Don't compare 4th element used for padding. */
const array<float3>& a = *(const array<float3>*)(((char*)node) + socket.struct_offset);
for(size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t*)&a[i], sizeof(float) * 3);
}
/* Don't compare 4th element used for padding. */
const array<float3> &a = *(const array<float3> *)(((char *)node) + socket.struct_offset);
for (size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t *)&a[i], sizeof(float) * 3);
}
}
} // namespace
void Node::hash(MD5Hash& md5)
void Node::hash(MD5Hash &md5)
{
md5.append(type->name.string());
md5.append(type->name.string());
foreach(const SocketType& socket, type->inputs) {
md5.append(socket.name.string());
foreach (const SocketType &socket, type->inputs) {
md5.append(socket.name.string());
switch(socket.type) {
case SocketType::BOOLEAN: value_hash<bool>(this, socket, md5); break;
case SocketType::FLOAT: value_hash<float>(this, socket, md5); break;
case SocketType::INT: value_hash<int>(this, socket, md5); break;
case SocketType::UINT: value_hash<uint>(this, socket, md5); break;
case SocketType::COLOR: float3_hash(this, socket, md5); break;
case SocketType::VECTOR: float3_hash(this, socket, md5); break;
case SocketType::POINT: float3_hash(this, socket, md5); break;
case SocketType::NORMAL: float3_hash(this, socket, md5); break;
case SocketType::POINT2: value_hash<float2>(this, socket, md5); break;
case SocketType::CLOSURE: break;
case SocketType::STRING: value_hash<ustring>(this, socket, md5); break;
case SocketType::ENUM: value_hash<int>(this, socket, md5); break;
case SocketType::TRANSFORM: value_hash<Transform>(this, socket, md5); break;
case SocketType::NODE: value_hash<void*>(this, socket, md5); break;
switch (socket.type) {
case SocketType::BOOLEAN:
value_hash<bool>(this, socket, md5);
break;
case SocketType::FLOAT:
value_hash<float>(this, socket, md5);
break;
case SocketType::INT:
value_hash<int>(this, socket, md5);
break;
case SocketType::UINT:
value_hash<uint>(this, socket, md5);
break;
case SocketType::COLOR:
float3_hash(this, socket, md5);
break;
case SocketType::VECTOR:
float3_hash(this, socket, md5);
break;
case SocketType::POINT:
float3_hash(this, socket, md5);
break;
case SocketType::NORMAL:
float3_hash(this, socket, md5);
break;
case SocketType::POINT2:
value_hash<float2>(this, socket, md5);
break;
case SocketType::CLOSURE:
break;
case SocketType::STRING:
value_hash<ustring>(this, socket, md5);
break;
case SocketType::ENUM:
value_hash<int>(this, socket, md5);
break;
case SocketType::TRANSFORM:
value_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE:
value_hash<void *>(this, socket, md5);
break;
case SocketType::BOOLEAN_ARRAY: array_hash<bool>(this, socket, md5); break;
case SocketType::FLOAT_ARRAY: array_hash<float>(this, socket, md5); break;
case SocketType::INT_ARRAY: array_hash<int>(this, socket, md5); break;
case SocketType::COLOR_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::VECTOR_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::POINT_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::NORMAL_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::POINT2_ARRAY: array_hash<float2>(this, socket, md5); break;
case SocketType::STRING_ARRAY: array_hash<ustring>(this, socket, md5); break;
case SocketType::TRANSFORM_ARRAY: array_hash<Transform>(this, socket, md5); break;
case SocketType::NODE_ARRAY: array_hash<void*>(this, socket, md5); break;
case SocketType::BOOLEAN_ARRAY:
array_hash<bool>(this, socket, md5);
break;
case SocketType::FLOAT_ARRAY:
array_hash<float>(this, socket, md5);
break;
case SocketType::INT_ARRAY:
array_hash<int>(this, socket, md5);
break;
case SocketType::COLOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::VECTOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::NORMAL_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT2_ARRAY:
array_hash<float2>(this, socket, md5);
break;
case SocketType::STRING_ARRAY:
array_hash<ustring>(this, socket, md5);
break;
case SocketType::TRANSFORM_ARRAY:
array_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE_ARRAY:
array_hash<void *>(this, socket, md5);
break;
case SocketType::UNDEFINED: break;
}
}
case SocketType::UNDEFINED:
break;
}
}
}
namespace {
template<typename T>
size_t array_size_in_bytes(const Node *node, const SocketType& socket)
template<typename T> size_t array_size_in_bytes(const Node *node, const SocketType &socket)
{
const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset);
return a.size() * sizeof(T);
const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
return a.size() * sizeof(T);
}
} // namespace
size_t Node::get_total_size_in_bytes() const
{
size_t total_size = 0;
foreach(const SocketType& socket, type->inputs) {
switch(socket.type) {
case SocketType::BOOLEAN:
case SocketType::FLOAT:
case SocketType::INT:
case SocketType::UINT:
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
case SocketType::POINT2:
case SocketType::CLOSURE:
case SocketType::STRING:
case SocketType::ENUM:
case SocketType::TRANSFORM:
case SocketType::NODE:
total_size += socket.size();
break;
size_t total_size = 0;
foreach (const SocketType &socket, type->inputs) {
switch (socket.type) {
case SocketType::BOOLEAN:
case SocketType::FLOAT:
case SocketType::INT:
case SocketType::UINT:
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
case SocketType::POINT2:
case SocketType::CLOSURE:
case SocketType::STRING:
case SocketType::ENUM:
case SocketType::TRANSFORM:
case SocketType::NODE:
total_size += socket.size();
break;
case SocketType::BOOLEAN_ARRAY:
total_size += array_size_in_bytes<bool>(this, socket);
break;
case SocketType::FLOAT_ARRAY:
total_size += array_size_in_bytes<float>(this, socket);
break;
case SocketType::INT_ARRAY:
total_size += array_size_in_bytes<int>(this, socket);
break;
case SocketType::COLOR_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::VECTOR_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::POINT_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::NORMAL_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::POINT2_ARRAY:
total_size += array_size_in_bytes<float2>(this, socket);
break;
case SocketType::STRING_ARRAY:
total_size += array_size_in_bytes<ustring>(this, socket);
break;
case SocketType::TRANSFORM_ARRAY:
total_size += array_size_in_bytes<Transform>(this, socket);
break;
case SocketType::NODE_ARRAY:
total_size += array_size_in_bytes<void*>(this, socket);
break;
case SocketType::BOOLEAN_ARRAY:
total_size += array_size_in_bytes<bool>(this, socket);
break;
case SocketType::FLOAT_ARRAY:
total_size += array_size_in_bytes<float>(this, socket);
break;
case SocketType::INT_ARRAY:
total_size += array_size_in_bytes<int>(this, socket);
break;
case SocketType::COLOR_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::VECTOR_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::POINT_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::NORMAL_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket);
break;
case SocketType::POINT2_ARRAY:
total_size += array_size_in_bytes<float2>(this, socket);
break;
case SocketType::STRING_ARRAY:
total_size += array_size_in_bytes<ustring>(this, socket);
break;
case SocketType::TRANSFORM_ARRAY:
total_size += array_size_in_bytes<Transform>(this, socket);
break;
case SocketType::NODE_ARRAY:
total_size += array_size_in_bytes<void *>(this, socket);
break;
case SocketType::UNDEFINED: break;
}
}
return total_size;
case SocketType::UNDEFINED:
break;
}
}
return total_size;
}
CCL_NAMESPACE_END

View File

@@ -31,72 +31,71 @@ struct Transform;
/* Node */
struct Node
{
explicit Node(const NodeType *type, ustring name = ustring());
virtual ~Node();
struct Node {
explicit Node(const NodeType *type, ustring name = ustring());
virtual ~Node();
/* set values */
void set(const SocketType& input, bool value);
void set(const SocketType& input, int value);
void set(const SocketType& input, uint value);
void set(const SocketType& input, float value);
void set(const SocketType& input, float2 value);
void set(const SocketType& input, float3 value);
void set(const SocketType& input, const char *value);
void set(const SocketType& input, ustring value);
void set(const SocketType& input, const Transform& value);
void set(const SocketType& input, Node *value);
/* set values */
void set(const SocketType &input, bool value);
void set(const SocketType &input, int value);
void set(const SocketType &input, uint value);
void set(const SocketType &input, float value);
void set(const SocketType &input, float2 value);
void set(const SocketType &input, float3 value);
void set(const SocketType &input, const char *value);
void set(const SocketType &input, ustring value);
void set(const SocketType &input, const Transform &value);
void set(const SocketType &input, Node *value);
/* set array values. the memory from the input array will taken over
* by the node and the input array will be empty after return */
void set(const SocketType& input, array<bool>& value);
void set(const SocketType& input, array<int>& value);
void set(const SocketType& input, array<float>& value);
void set(const SocketType& input, array<float2>& value);
void set(const SocketType& input, array<float3>& value);
void set(const SocketType& input, array<ustring>& value);
void set(const SocketType& input, array<Transform>& value);
void set(const SocketType& input, array<Node*>& value);
/* set array values. the memory from the input array will taken over
* by the node and the input array will be empty after return */
void set(const SocketType &input, array<bool> &value);
void set(const SocketType &input, array<int> &value);
void set(const SocketType &input, array<float> &value);
void set(const SocketType &input, array<float2> &value);
void set(const SocketType &input, array<float3> &value);
void set(const SocketType &input, array<ustring> &value);
void set(const SocketType &input, array<Transform> &value);
void set(const SocketType &input, array<Node *> &value);
/* get values */
bool get_bool(const SocketType& input) const;
int get_int(const SocketType& input) const;
uint get_uint(const SocketType& input) const;
float get_float(const SocketType& input) const;
float2 get_float2(const SocketType& input) const;
float3 get_float3(const SocketType& input) const;
ustring get_string(const SocketType& input) const;
Transform get_transform(const SocketType& input) const;
Node *get_node(const SocketType& input) const;
/* get values */
bool get_bool(const SocketType &input) const;
int get_int(const SocketType &input) const;
uint get_uint(const SocketType &input) const;
float get_float(const SocketType &input) const;
float2 get_float2(const SocketType &input) const;
float3 get_float3(const SocketType &input) const;
ustring get_string(const SocketType &input) const;
Transform get_transform(const SocketType &input) const;
Node *get_node(const SocketType &input) const;
/* get array values */
const array<bool>& get_bool_array(const SocketType& input) const;
const array<int>& get_int_array(const SocketType& input) const;
const array<float>& get_float_array(const SocketType& input) const;
const array<float2>& get_float2_array(const SocketType& input) const;
const array<float3>& get_float3_array(const SocketType& input) const;
const array<ustring>& get_string_array(const SocketType& input) const;
const array<Transform>& get_transform_array(const SocketType& input) const;
const array<Node*>& get_node_array(const SocketType& input) const;
/* get array values */
const array<bool> &get_bool_array(const SocketType &input) const;
const array<int> &get_int_array(const SocketType &input) const;
const array<float> &get_float_array(const SocketType &input) const;
const array<float2> &get_float2_array(const SocketType &input) const;
const array<float3> &get_float3_array(const SocketType &input) const;
const array<ustring> &get_string_array(const SocketType &input) const;
const array<Transform> &get_transform_array(const SocketType &input) const;
const array<Node *> &get_node_array(const SocketType &input) const;
/* generic values operations */
bool has_default_value(const SocketType& input) const;
void set_default_value(const SocketType& input);
bool equals_value(const Node& other, const SocketType& input) const;
void copy_value(const SocketType& input, const Node& other, const SocketType& other_input);
/* generic values operations */
bool has_default_value(const SocketType &input) const;
void set_default_value(const SocketType &input);
bool equals_value(const Node &other, const SocketType &input) const;
void copy_value(const SocketType &input, const Node &other, const SocketType &other_input);
/* equals */
bool equals(const Node& other) const;
/* equals */
bool equals(const Node &other) const;
/* compute hash of node and its socket values */
void hash(MD5Hash& md5);
/* compute hash of node and its socket values */
void hash(MD5Hash &md5);
/* Get total size of this node. */
size_t get_total_size_in_bytes() const;
/* Get total size of this node. */
size_t get_total_size_in_bytes() const;
ustring name;
const NodeType *type;
ustring name;
const NodeType *type;
};
CCL_NAMESPACE_END

View File

@@ -26,25 +26,50 @@ CCL_NAMESPACE_BEGIN
* Utility class for enum values. */
struct NodeEnum {
bool empty() const { return left.empty(); }
void insert(const char *x, int y) {
left[ustring(x)] = y;
right[y] = ustring(x);
}
bool empty() const
{
return left.empty();
}
void insert(const char *x, int y)
{
left[ustring(x)] = y;
right[y] = ustring(x);
}
bool exists(ustring x) const { return left.find(x) != left.end(); }
bool exists(int y) const { return right.find(y) != right.end(); }
bool exists(ustring x) const
{
return left.find(x) != left.end();
}
bool exists(int y) const
{
return right.find(y) != right.end();
}
int operator[](const char *x) const { return left.find(ustring(x))->second; }
int operator[](ustring x) const { return left.find(x)->second; }
ustring operator[](int y) const { return right.find(y)->second; }
int operator[](const char *x) const
{
return left.find(ustring(x))->second;
}
int operator[](ustring x) const
{
return left.find(x)->second;
}
ustring operator[](int y) const
{
return right.find(y)->second;
}
unordered_map<ustring, int, ustringHash>::const_iterator begin() const { return left.begin(); }
unordered_map<ustring, int, ustringHash>::const_iterator end() const { return left.end(); }
unordered_map<ustring, int, ustringHash>::const_iterator begin() const
{
return left.begin();
}
unordered_map<ustring, int, ustringHash>::const_iterator end() const
{
return left.end();
}
private:
unordered_map<ustring, int, ustringHash> left;
unordered_map<int, ustring> right;
private:
unordered_map<ustring, int, ustringHash> left;
unordered_map<int, ustring> right;
};
CCL_NAMESPACE_END

View File

@@ -24,107 +24,118 @@ CCL_NAMESPACE_BEGIN
size_t SocketType::size() const
{
return size(type);
return size(type);
}
bool SocketType::is_array() const
{
return (type >= BOOLEAN_ARRAY);
return (type >= BOOLEAN_ARRAY);
}
size_t SocketType::size(Type type)
{
switch(type)
{
case UNDEFINED: return 0;
switch (type) {
case UNDEFINED:
return 0;
case BOOLEAN: return sizeof(bool);
case FLOAT: return sizeof(float);
case INT: return sizeof(int);
case UINT: return sizeof(uint);
case COLOR: return sizeof(float3);
case VECTOR: return sizeof(float3);
case POINT: return sizeof(float3);
case NORMAL: return sizeof(float3);
case POINT2: return sizeof(float2);
case CLOSURE: return 0;
case STRING: return sizeof(ustring);
case ENUM: return sizeof(int);
case TRANSFORM: return sizeof(Transform);
case NODE: return sizeof(void*);
case BOOLEAN:
return sizeof(bool);
case FLOAT:
return sizeof(float);
case INT:
return sizeof(int);
case UINT:
return sizeof(uint);
case COLOR:
return sizeof(float3);
case VECTOR:
return sizeof(float3);
case POINT:
return sizeof(float3);
case NORMAL:
return sizeof(float3);
case POINT2:
return sizeof(float2);
case CLOSURE:
return 0;
case STRING:
return sizeof(ustring);
case ENUM:
return sizeof(int);
case TRANSFORM:
return sizeof(Transform);
case NODE:
return sizeof(void *);
case BOOLEAN_ARRAY: return sizeof(array<bool>);
case FLOAT_ARRAY: return sizeof(array<float>);
case INT_ARRAY: return sizeof(array<int>);
case COLOR_ARRAY: return sizeof(array<float3>);
case VECTOR_ARRAY: return sizeof(array<float3>);
case POINT_ARRAY: return sizeof(array<float3>);
case NORMAL_ARRAY: return sizeof(array<float3>);
case POINT2_ARRAY: return sizeof(array<float2>);
case STRING_ARRAY: return sizeof(array<ustring>);
case TRANSFORM_ARRAY: return sizeof(array<Transform>);
case NODE_ARRAY: return sizeof(array<void*>);
}
case BOOLEAN_ARRAY:
return sizeof(array<bool>);
case FLOAT_ARRAY:
return sizeof(array<float>);
case INT_ARRAY:
return sizeof(array<int>);
case COLOR_ARRAY:
return sizeof(array<float3>);
case VECTOR_ARRAY:
return sizeof(array<float3>);
case POINT_ARRAY:
return sizeof(array<float3>);
case NORMAL_ARRAY:
return sizeof(array<float3>);
case POINT2_ARRAY:
return sizeof(array<float2>);
case STRING_ARRAY:
return sizeof(array<ustring>);
case TRANSFORM_ARRAY:
return sizeof(array<Transform>);
case NODE_ARRAY:
return sizeof(array<void *>);
}
assert(0);
return 0;
assert(0);
return 0;
}
size_t SocketType::max_size()
{
return sizeof(Transform);
return sizeof(Transform);
}
void *SocketType::zero_default_value()
{
static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
return &zero_transform;
static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
return &zero_transform;
}
ustring SocketType::type_name(Type type)
{
static ustring names[] = {
ustring("undefined"),
static ustring names[] = {ustring("undefined"),
ustring("boolean"),
ustring("float"),
ustring("int"),
ustring("uint"),
ustring("color"),
ustring("vector"),
ustring("point"),
ustring("normal"),
ustring("point2"),
ustring("closure"),
ustring("string"),
ustring("enum"),
ustring("transform"),
ustring("node"),
ustring("boolean"), ustring("float"),
ustring("int"), ustring("uint"),
ustring("color"), ustring("vector"),
ustring("point"), ustring("normal"),
ustring("point2"), ustring("closure"),
ustring("string"), ustring("enum"),
ustring("transform"), ustring("node"),
ustring("array_boolean"),
ustring("array_float"),
ustring("array_int"),
ustring("array_color"),
ustring("array_vector"),
ustring("array_point"),
ustring("array_normal"),
ustring("array_point2"),
ustring("array_string"),
ustring("array_transform"),
ustring("array_node")};
ustring("array_boolean"), ustring("array_float"),
ustring("array_int"), ustring("array_color"),
ustring("array_vector"), ustring("array_point"),
ustring("array_normal"), ustring("array_point2"),
ustring("array_string"), ustring("array_transform"),
ustring("array_node")};
return names[(int)type];
return names[(int)type];
}
bool SocketType::is_float3(Type type)
{
return (type == COLOR || type == VECTOR || type == POINT || type == NORMAL);
return (type == COLOR || type == VECTOR || type == POINT || type == NORMAL);
}
/* Node Type */
NodeType::NodeType(Type type_)
: type(type_)
NodeType::NodeType(Type type_) : type(type_)
{
}
@@ -132,88 +143,94 @@ NodeType::~NodeType()
{
}
void NodeType::register_input(ustring name, ustring ui_name, SocketType::Type type, int struct_offset,
const void *default_value, const NodeEnum *enum_values,
const NodeType **node_type, int flags, int extra_flags)
void NodeType::register_input(ustring name,
ustring ui_name,
SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values,
const NodeType **node_type,
int flags,
int extra_flags)
{
SocketType socket;
socket.name = name;
socket.ui_name = ui_name;
socket.type = type;
socket.struct_offset = struct_offset;
socket.default_value = default_value;
socket.enum_values = enum_values;
socket.node_type = node_type;
socket.flags = flags | extra_flags;
inputs.push_back(socket);
SocketType socket;
socket.name = name;
socket.ui_name = ui_name;
socket.type = type;
socket.struct_offset = struct_offset;
socket.default_value = default_value;
socket.enum_values = enum_values;
socket.node_type = node_type;
socket.flags = flags | extra_flags;
inputs.push_back(socket);
}
void NodeType::register_output(ustring name, ustring ui_name, SocketType::Type type)
{
SocketType socket;
socket.name = name;
socket.ui_name = ui_name;
socket.type = type;
socket.struct_offset = 0;
socket.default_value = NULL;
socket.enum_values = NULL;
socket.node_type = NULL;
socket.flags = SocketType::LINKABLE;
outputs.push_back(socket);
SocketType socket;
socket.name = name;
socket.ui_name = ui_name;
socket.type = type;
socket.struct_offset = 0;
socket.default_value = NULL;
socket.enum_values = NULL;
socket.node_type = NULL;
socket.flags = SocketType::LINKABLE;
outputs.push_back(socket);
}
const SocketType *NodeType::find_input(ustring name) const
{
foreach(const SocketType& socket, inputs) {
if(socket.name == name) {
return &socket;
}
}
foreach (const SocketType &socket, inputs) {
if (socket.name == name) {
return &socket;
}
}
return NULL;
return NULL;
}
const SocketType *NodeType::find_output(ustring name) const
{
foreach(const SocketType& socket, outputs) {
if(socket.name == name) {
return &socket;
}
}
foreach (const SocketType &socket, outputs) {
if (socket.name == name) {
return &socket;
}
}
return NULL;
return NULL;
}
/* Node Type Registry */
unordered_map<ustring, NodeType, ustringHash>& NodeType::types()
unordered_map<ustring, NodeType, ustringHash> &NodeType::types()
{
static unordered_map<ustring, NodeType, ustringHash> _types;
return _types;
static unordered_map<ustring, NodeType, ustringHash> _types;
return _types;
}
NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
{
ustring name(name_);
ustring name(name_);
if(types().find(name) != types().end()) {
fprintf(stderr, "Node type %s registered twice!\n", name_);
assert(0);
return NULL;
}
if (types().find(name) != types().end()) {
fprintf(stderr, "Node type %s registered twice!\n", name_);
assert(0);
return NULL;
}
types()[name] = NodeType(type_);
types()[name] = NodeType(type_);
NodeType *type = &types()[name];
type->name = name;
type->create = create_;
return type;
NodeType *type = &types()[name];
type->name = name;
type->create = create_;
return type;
}
const NodeType *NodeType::find(ustring name)
{
unordered_map<ustring, NodeType, ustringHash>::iterator it = types().find(name);
return (it == types().end()) ? NULL : &it->second;
unordered_map<ustring, NodeType, ustringHash>::iterator it = types().find(name);
return (it == types().end()) ? NULL : &it->second;
}
CCL_NAMESPACE_END

View File

@@ -30,236 +30,349 @@ struct NodeType;
/* Socket Type */
struct SocketType
{
enum Type
{
UNDEFINED,
struct SocketType {
enum Type {
UNDEFINED,
BOOLEAN,
FLOAT,
INT,
UINT,
COLOR,
VECTOR,
POINT,
NORMAL,
POINT2,
CLOSURE,
STRING,
ENUM,
TRANSFORM,
NODE,
BOOLEAN,
FLOAT,
INT,
UINT,
COLOR,
VECTOR,
POINT,
NORMAL,
POINT2,
CLOSURE,
STRING,
ENUM,
TRANSFORM,
NODE,
BOOLEAN_ARRAY,
FLOAT_ARRAY,
INT_ARRAY,
COLOR_ARRAY,
VECTOR_ARRAY,
POINT_ARRAY,
NORMAL_ARRAY,
POINT2_ARRAY,
STRING_ARRAY,
TRANSFORM_ARRAY,
NODE_ARRAY,
};
BOOLEAN_ARRAY,
FLOAT_ARRAY,
INT_ARRAY,
COLOR_ARRAY,
VECTOR_ARRAY,
POINT_ARRAY,
NORMAL_ARRAY,
POINT2_ARRAY,
STRING_ARRAY,
TRANSFORM_ARRAY,
NODE_ARRAY,
};
enum Flags {
LINKABLE = (1 << 0),
ANIMATABLE = (1 << 1),
enum Flags {
LINKABLE = (1 << 0),
ANIMATABLE = (1 << 1),
SVM_INTERNAL = (1 << 2),
OSL_INTERNAL = (1 << 3),
INTERNAL = (1 << 2) | (1 << 3),
SVM_INTERNAL = (1 << 2),
OSL_INTERNAL = (1 << 3),
INTERNAL = (1 << 2) | (1 << 3),
LINK_TEXTURE_GENERATED = (1 << 4),
LINK_TEXTURE_NORMAL = (1 << 5),
LINK_TEXTURE_UV = (1 << 6),
LINK_INCOMING = (1 << 7),
LINK_NORMAL = (1 << 8),
LINK_POSITION = (1 << 9),
LINK_TANGENT = (1 << 10),
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
};
LINK_TEXTURE_GENERATED = (1 << 4),
LINK_TEXTURE_NORMAL = (1 << 5),
LINK_TEXTURE_UV = (1 << 6),
LINK_INCOMING = (1 << 7),
LINK_NORMAL = (1 << 8),
LINK_POSITION = (1 << 9),
LINK_TANGENT = (1 << 10),
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
};
ustring name;
Type type;
int struct_offset;
const void *default_value;
const NodeEnum *enum_values;
const NodeType **node_type;
int flags;
ustring ui_name;
ustring name;
Type type;
int struct_offset;
const void *default_value;
const NodeEnum *enum_values;
const NodeType **node_type;
int flags;
ustring ui_name;
size_t size() const;
bool is_array() const;
static size_t size(Type type);
static size_t max_size();
static ustring type_name(Type type);
static void *zero_default_value();
static bool is_float3(Type type);
size_t size() const;
bool is_array() const;
static size_t size(Type type);
static size_t max_size();
static ustring type_name(Type type);
static void *zero_default_value();
static bool is_float3(Type type);
};
/* Node Type */
struct NodeType
{
enum Type {
NONE,
SHADER
};
struct NodeType {
enum Type { NONE, SHADER };
explicit NodeType(Type type = NONE);
~NodeType();
explicit NodeType(Type type = NONE);
~NodeType();
void register_input(ustring name, ustring ui_name, SocketType::Type type,
int struct_offset, const void *default_value,
const NodeEnum *enum_values = NULL,
const NodeType **node_type = NULL,
int flags = 0, int extra_flags = 0);
void register_output(ustring name, ustring ui_name, SocketType::Type type);
void register_input(ustring name,
ustring ui_name,
SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values = NULL,
const NodeType **node_type = NULL,
int flags = 0,
int extra_flags = 0);
void register_output(ustring name, ustring ui_name, SocketType::Type type);
const SocketType *find_input(ustring name) const;
const SocketType *find_output(ustring name) const;
const SocketType *find_input(ustring name) const;
const SocketType *find_output(ustring name) const;
typedef Node *(*CreateFunc)(const NodeType *type);
typedef Node *(*CreateFunc)(const NodeType *type);
ustring name;
Type type;
vector<SocketType, std::allocator<SocketType> > inputs;
vector<SocketType, std::allocator<SocketType> > outputs;
CreateFunc create;
ustring name;
Type type;
vector<SocketType, std::allocator<SocketType>> inputs;
vector<SocketType, std::allocator<SocketType>> outputs;
CreateFunc create;
static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
static const NodeType *find(ustring name);
static unordered_map<ustring, NodeType, ustringHash>& types();
static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
static const NodeType *find(ustring name);
static unordered_map<ustring, NodeType, ustringHash> &types();
};
/* Node Definition Macros */
#define NODE_DECLARE \
template<typename T> \
static const NodeType *register_type(); \
static Node *create(const NodeType *type); \
static const NodeType *node_type;
#define NODE_DECLARE \
template<typename T> static const NodeType *register_type(); \
static Node *create(const NodeType *type); \
static const NodeType *node_type;
#define NODE_DEFINE(structname) \
const NodeType *structname::node_type = structname::register_type<structname>(); \
Node *structname::create(const NodeType*) { return new structname(); } \
template<typename T> \
const NodeType *structname::register_type()
#define NODE_DEFINE(structname) \
const NodeType *structname::node_type = structname::register_type<structname>(); \
Node *structname::create(const NodeType *) \
{ \
return new structname(); \
} \
template<typename T> const NodeType *structname::register_type()
/* Sock Definition Macros */
#define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1)
#define SOCKET_SIZEOF(T, name) (sizeof(((T *)1)->name))
#define SOCKET_DEFINE(name, ui_name, default_value, datatype, TYPE, flags, ...) \
{ \
static datatype defval = default_value; \
CHECK_TYPE(((T *)1)->name, datatype); \
type->register_input(ustring(#name), ustring(ui_name), TYPE, SOCKET_OFFSETOF(T, name), &defval, NULL, NULL, flags, ##__VA_ARGS__); \
}
{ \
static datatype defval = default_value; \
CHECK_TYPE(((T *)1)->name, datatype); \
type->register_input(ustring(#name), \
ustring(ui_name), \
TYPE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
NULL, \
flags, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, 0, ##__VA_ARGS__)
#define SOCKET_INT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, 0, ##__VA_ARGS__)
#define SOCKET_UINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, uint, SocketType::UINT, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, uint, SocketType::UINT, 0, ##__VA_ARGS__)
#define SOCKET_FLOAT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, 0, ##__VA_ARGS__)
#define SOCKET_COLOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, 0, ##__VA_ARGS__)
#define SOCKET_VECTOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, 0, ##__VA_ARGS__)
#define SOCKET_POINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, 0, ##__VA_ARGS__)
#define SOCKET_NORMAL(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, 0, ##__VA_ARGS__)
#define SOCKET_POINT2(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float2, SocketType::POINT2, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, float2, SocketType::POINT2, 0, ##__VA_ARGS__)
#define SOCKET_STRING(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, 0, ##__VA_ARGS__)
#define SOCKET_TRANSFORM(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, Transform, SocketType::TRANSFORM, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, Transform, SocketType::TRANSFORM, 0, ##__VA_ARGS__)
#define SOCKET_ENUM(name, ui_name, values, default_value, ...) \
{ \
static int defval = default_value; \
assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::ENUM, SOCKET_OFFSETOF(T, name), &defval, &values, NULL, ##__VA_ARGS__); \
}
{ \
static int defval = default_value; \
assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::ENUM, \
SOCKET_OFFSETOF(T, name), \
&defval, \
&values, \
NULL, \
##__VA_ARGS__); \
}
#define SOCKET_NODE(name, ui_name, node_type, ...) \
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
}
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::NODE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_INT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_FLOAT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_COLOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_VECTOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_NORMAL_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT2_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_STRING_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_TRANSFORM_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<Transform>, SocketType::TRANSFORM_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
array<Transform>, \
SocketType::TRANSFORM_ARRAY, \
0, \
##__VA_ARGS__)
#define SOCKET_NODE_ARRAY(name, ui_name, node_type, ...) \
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE_ARRAY, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
}
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::NODE_ARRAY, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_IN_BOOLEAN(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
bool, \
SocketType::BOOLEAN, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_INT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
#define SOCKET_IN_FLOAT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float, \
SocketType::FLOAT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_COLOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::COLOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_VECTOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::VECTOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_POINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::POINT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_NORMAL(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::NORMAL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_STRING(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
ustring, \
SocketType::STRING, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_CLOSURE(name, ui_name, ...) \
type->register_input(ustring(#name), ustring(ui_name), SocketType::CLOSURE, 0, NULL, NULL, NULL, SocketType::LINKABLE, ##__VA_ARGS__)
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::CLOSURE, \
0, \
NULL, \
NULL, \
NULL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_OUT_BOOLEAN(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); \
}
#define SOCKET_OUT_INT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); \
}
#define SOCKET_OUT_FLOAT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); \
}
#define SOCKET_OUT_COLOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); \
}
#define SOCKET_OUT_VECTOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); \
}
#define SOCKET_OUT_POINT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); \
}
#define SOCKET_OUT_NORMAL(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); \
}
#define SOCKET_OUT_CLOSURE(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); \
}
#define SOCKET_OUT_STRING(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); \
}
#define SOCKET_OUT_ENUM(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); \
}
CCL_NAMESPACE_END

View File

@@ -24,437 +24,409 @@ CCL_NAMESPACE_BEGIN
static bool xml_read_boolean(const char *value)
{
return string_iequals(value, "true") || (atoi(value) != 0);
return string_iequals(value, "true") || (atoi(value) != 0);
}
static const char *xml_write_boolean(bool value)
{
return (value) ? "true" : "false";
return (value) ? "true" : "false";
}
template<int VECTOR_SIZE, typename T>
static void xml_read_float_array(T& value, xml_attribute attr)
static void xml_read_float_array(T &value, xml_attribute attr)
{
vector<string> tokens;
string_split(tokens, attr.value());
vector<string> tokens;
string_split(tokens, attr.value());
if(tokens.size() % VECTOR_SIZE != 0) {
return;
}
if (tokens.size() % VECTOR_SIZE != 0) {
return;
}
value.resize(tokens.size() / VECTOR_SIZE);
for(size_t i = 0; i < value.size(); i++) {
float *value_float = (float*)&value[i];
value.resize(tokens.size() / VECTOR_SIZE);
for (size_t i = 0; i < value.size(); i++) {
float *value_float = (float *)&value[i];
for(size_t j = 0; j < VECTOR_SIZE; j++)
value_float[j] = (float)atof(tokens[i * VECTOR_SIZE + j].c_str());
}
for (size_t j = 0; j < VECTOR_SIZE; j++)
value_float[j] = (float)atof(tokens[i * VECTOR_SIZE + j].c_str());
}
}
void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
{
xml_attribute name_attr = xml_node.attribute("name");
if(name_attr) {
node->name = ustring(name_attr.value());
}
xml_attribute name_attr = xml_node.attribute("name");
if (name_attr) {
node->name = ustring(name_attr.value());
}
foreach(const SocketType& socket, node->type->inputs) {
if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue;
}
if(socket.flags & SocketType::INTERNAL) {
continue;
}
foreach (const SocketType &socket, node->type->inputs) {
if (socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue;
}
if (socket.flags & SocketType::INTERNAL) {
continue;
}
xml_attribute attr = xml_node.attribute(socket.name.c_str());
xml_attribute attr = xml_node.attribute(socket.name.c_str());
if(!attr) {
continue;
}
if (!attr) {
continue;
}
switch(socket.type)
{
case SocketType::BOOLEAN:
{
node->set(socket, xml_read_boolean(attr.value()));
break;
}
case SocketType::BOOLEAN_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
switch (socket.type) {
case SocketType::BOOLEAN: {
node->set(socket, xml_read_boolean(attr.value()));
break;
}
case SocketType::BOOLEAN_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<bool> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++)
value[i] = xml_read_boolean(tokens[i].c_str());
node->set(socket, value);
break;
}
case SocketType::FLOAT:
{
node->set(socket, (float)atof(attr.value()));
break;
}
case SocketType::FLOAT_ARRAY:
{
array<float> value;
xml_read_float_array<1>(value, attr);
node->set(socket, value);
break;
}
case SocketType::INT:
{
node->set(socket, (int)atoi(attr.value()));
break;
}
case SocketType::UINT:
{
node->set(socket, (uint)atoi(attr.value()));
break;
}
case SocketType::INT_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
array<bool> value;
value.resize(tokens.size());
for (size_t i = 0; i < value.size(); i++)
value[i] = xml_read_boolean(tokens[i].c_str());
node->set(socket, value);
break;
}
case SocketType::FLOAT: {
node->set(socket, (float)atof(attr.value()));
break;
}
case SocketType::FLOAT_ARRAY: {
array<float> value;
xml_read_float_array<1>(value, attr);
node->set(socket, value);
break;
}
case SocketType::INT: {
node->set(socket, (int)atoi(attr.value()));
break;
}
case SocketType::UINT: {
node->set(socket, (uint)atoi(attr.value()));
break;
}
case SocketType::INT_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<int> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) {
value[i] = (int)atoi(attr.value());
}
node->set(socket, value);
break;
}
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
{
array<float3> value;
xml_read_float_array<3>(value, attr);
if(value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY:
{
array<float3> value;
xml_read_float_array<3>(value, attr);
node->set(socket, value);
break;
}
case SocketType::POINT2:
{
array<float2> value;
xml_read_float_array<2>(value, attr);
if(value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::POINT2_ARRAY:
{
array<float2> value;
xml_read_float_array<2>(value, attr);
node->set(socket, value);
break;
}
case SocketType::STRING:
{
node->set(socket, attr.value());
break;
}
case SocketType::ENUM:
{
ustring value(attr.value());
if(socket.enum_values->exists(value)) {
node->set(socket, value);
}
else {
fprintf(stderr, "Unknown value \"%s\" for attribute \"%s\".\n", value.c_str(), socket.name.c_str());
}
break;
}
case SocketType::STRING_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
array<int> value;
value.resize(tokens.size());
for (size_t i = 0; i < value.size(); i++) {
value[i] = (int)atoi(attr.value());
}
node->set(socket, value);
break;
}
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL: {
array<float3> value;
xml_read_float_array<3>(value, attr);
if (value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY: {
array<float3> value;
xml_read_float_array<3>(value, attr);
node->set(socket, value);
break;
}
case SocketType::POINT2: {
array<float2> value;
xml_read_float_array<2>(value, attr);
if (value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::POINT2_ARRAY: {
array<float2> value;
xml_read_float_array<2>(value, attr);
node->set(socket, value);
break;
}
case SocketType::STRING: {
node->set(socket, attr.value());
break;
}
case SocketType::ENUM: {
ustring value(attr.value());
if (socket.enum_values->exists(value)) {
node->set(socket, value);
}
else {
fprintf(stderr,
"Unknown value \"%s\" for attribute \"%s\".\n",
value.c_str(),
socket.name.c_str());
}
break;
}
case SocketType::STRING_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<ustring> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) {
value[i] = ustring(tokens[i]);
}
node->set(socket, value);
break;
}
case SocketType::TRANSFORM:
{
array<Transform> value;
xml_read_float_array<12>(value, attr);
if(value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::TRANSFORM_ARRAY:
{
array<Transform> value;
xml_read_float_array<12>(value, attr);
node->set(socket, value);
break;
}
case SocketType::NODE:
{
ustring value(attr.value());
map<ustring, Node*>::iterator it = reader.node_map.find(value);
if(it != reader.node_map.end())
{
Node *value_node = it->second;
if(value_node->type == *(socket.node_type))
node->set(socket, it->second);
}
break;
}
case SocketType::NODE_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
array<ustring> value;
value.resize(tokens.size());
for (size_t i = 0; i < value.size(); i++) {
value[i] = ustring(tokens[i]);
}
node->set(socket, value);
break;
}
case SocketType::TRANSFORM: {
array<Transform> value;
xml_read_float_array<12>(value, attr);
if (value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::TRANSFORM_ARRAY: {
array<Transform> value;
xml_read_float_array<12>(value, attr);
node->set(socket, value);
break;
}
case SocketType::NODE: {
ustring value(attr.value());
map<ustring, Node *>::iterator it = reader.node_map.find(value);
if (it != reader.node_map.end()) {
Node *value_node = it->second;
if (value_node->type == *(socket.node_type))
node->set(socket, it->second);
}
break;
}
case SocketType::NODE_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<Node*> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++)
{
map<ustring, Node*>::iterator it = reader.node_map.find(ustring(tokens[i]));
if(it != reader.node_map.end())
{
Node *value_node = it->second;
value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
}
else
{
value[i] = NULL;
}
}
node->set(socket, value);
break;
}
case SocketType::CLOSURE:
case SocketType::UNDEFINED:
break;
}
}
array<Node *> value;
value.resize(tokens.size());
for (size_t i = 0; i < value.size(); i++) {
map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i]));
if (it != reader.node_map.end()) {
Node *value_node = it->second;
value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
}
else {
value[i] = NULL;
}
}
node->set(socket, value);
break;
}
case SocketType::CLOSURE:
case SocketType::UNDEFINED:
break;
}
}
if(!node->name.empty())
reader.node_map[node->name] = node;
if (!node->name.empty())
reader.node_map[node->name] = node;
}
xml_node xml_write_node(Node *node, xml_node xml_root)
{
xml_node xml_node = xml_root.append_child(node->type->name.c_str());
xml_node xml_node = xml_root.append_child(node->type->name.c_str());
xml_node.append_attribute("name") = node->name.c_str();
xml_node.append_attribute("name") = node->name.c_str();
foreach(const SocketType& socket, node->type->inputs) {
if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue;
}
if(socket.flags & SocketType::INTERNAL) {
continue;
}
if(node->has_default_value(socket)) {
continue;
}
foreach (const SocketType &socket, node->type->inputs) {
if (socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue;
}
if (socket.flags & SocketType::INTERNAL) {
continue;
}
if (node->has_default_value(socket)) {
continue;
}
xml_attribute attr = xml_node.append_attribute(socket.name.c_str());
xml_attribute attr = xml_node.append_attribute(socket.name.c_str());
switch(socket.type)
{
case SocketType::BOOLEAN:
{
attr = xml_write_boolean(node->get_bool(socket));
break;
}
case SocketType::BOOLEAN_ARRAY:
{
std::stringstream ss;
const array<bool>& value = node->get_bool_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << xml_write_boolean(value[i]);
if(i != value.size() - 1)
ss << " ";
}
attr = ss.str().c_str();
break;
}
case SocketType::FLOAT:
{
attr = (double)node->get_float(socket);
break;
}
case SocketType::FLOAT_ARRAY:
{
std::stringstream ss;
const array<float>& value = node->get_float_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << value[i];
if(i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::INT:
{
attr = node->get_int(socket);
break;
}
case SocketType::UINT:
{
attr = node->get_uint(socket);
break;
}
case SocketType::INT_ARRAY:
{
std::stringstream ss;
const array<int>& value = node->get_int_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << value[i];
if(i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
{
float3 value = node->get_float3(socket);
attr = string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
break;
}
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY:
{
std::stringstream ss;
const array<float3>& value = node->get_float3_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << string_printf("%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
if(i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::POINT2:
{
float2 value = node->get_float2(socket);
attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str();
break;
}
case SocketType::POINT2_ARRAY:
{
std::stringstream ss;
const array<float2>& value = node->get_float2_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << string_printf("%g %g", (double)value[i].x, (double)value[i].y);
if(i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::STRING:
case SocketType::ENUM:
{
attr = node->get_string(socket).c_str();
break;
}
case SocketType::STRING_ARRAY:
{
std::stringstream ss;
const array<ustring>& value = node->get_string_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << value[i];
if(i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM:
{
Transform tfm = node->get_transform(socket);
std::stringstream ss;
for(int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM_ARRAY:
{
std::stringstream ss;
const array<Transform>& value = node->get_transform_array(socket);
for(size_t j = 0; j < value.size(); j++) {
const Transform& tfm = value[j];
switch (socket.type) {
case SocketType::BOOLEAN: {
attr = xml_write_boolean(node->get_bool(socket));
break;
}
case SocketType::BOOLEAN_ARRAY: {
std::stringstream ss;
const array<bool> &value = node->get_bool_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << xml_write_boolean(value[i]);
if (i != value.size() - 1)
ss << " ";
}
attr = ss.str().c_str();
break;
}
case SocketType::FLOAT: {
attr = (double)node->get_float(socket);
break;
}
case SocketType::FLOAT_ARRAY: {
std::stringstream ss;
const array<float> &value = node->get_float_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << value[i];
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::INT: {
attr = node->get_int(socket);
break;
}
case SocketType::UINT: {
attr = node->get_uint(socket);
break;
}
case SocketType::INT_ARRAY: {
std::stringstream ss;
const array<int> &value = node->get_int_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << value[i];
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL: {
float3 value = node->get_float3(socket);
attr =
string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
break;
}
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY: {
std::stringstream ss;
const array<float3> &value = node->get_float3_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << string_printf(
"%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::POINT2: {
float2 value = node->get_float2(socket);
attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str();
break;
}
case SocketType::POINT2_ARRAY: {
std::stringstream ss;
const array<float2> &value = node->get_float2_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << string_printf("%g %g", (double)value[i].x, (double)value[i].y);
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::STRING:
case SocketType::ENUM: {
attr = node->get_string(socket).c_str();
break;
}
case SocketType::STRING_ARRAY: {
std::stringstream ss;
const array<ustring> &value = node->get_string_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << value[i];
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM: {
Transform tfm = node->get_transform(socket);
std::stringstream ss;
for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ",
(double)tfm[i][0],
(double)tfm[i][1],
(double)tfm[i][2],
(double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM_ARRAY: {
std::stringstream ss;
const array<Transform> &value = node->get_transform_array(socket);
for (size_t j = 0; j < value.size(); j++) {
const Transform &tfm = value[j];
for(int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
if(j != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::NODE:
{
Node *value = node->get_node(socket);
if(value) {
attr = value->name.c_str();
}
break;
}
case SocketType::NODE_ARRAY:
{
std::stringstream ss;
const array<Node*>& value = node->get_node_array(socket);
for(size_t i = 0; i < value.size(); i++) {
if(value[i]) {
ss << value[i]->name.c_str();
}
if(i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::CLOSURE:
case SocketType::UNDEFINED:
break;
}
}
for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ",
(double)tfm[i][0],
(double)tfm[i][1],
(double)tfm[i][2],
(double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
if (j != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::NODE: {
Node *value = node->get_node(socket);
if (value) {
attr = value->name.c_str();
}
break;
}
case SocketType::NODE_ARRAY: {
std::stringstream ss;
const array<Node *> &value = node->get_node_array(socket);
for (size_t i = 0; i < value.size(); i++) {
if (value[i]) {
ss << value[i]->name.c_str();
}
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::CLOSURE:
case SocketType::UNDEFINED:
break;
}
}
return xml_node;
return xml_node;
}
CCL_NAMESPACE_END

View File

@@ -25,10 +25,10 @@
CCL_NAMESPACE_BEGIN
struct XMLReader {
map<ustring, Node*> node_map;
map<ustring, Node *> node_map;
};
void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node);
void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node);
xml_node xml_write_node(Node *node, xml_node xml_root);
CCL_NAMESPACE_END

View File

@@ -1,7 +1,7 @@
remove_extra_strict_flags()
set(INC
..
..
)
set(INC_SYS
@@ -9,328 +9,328 @@ set(INC_SYS
)
set(SRC_CPU_KERNELS
kernels/cpu/kernel.cpp
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp
kernels/cpu/kernel_avx.cpp
kernels/cpu/kernel_avx2.cpp
kernels/cpu/kernel_split.cpp
kernels/cpu/kernel_split_sse2.cpp
kernels/cpu/kernel_split_sse3.cpp
kernels/cpu/kernel_split_sse41.cpp
kernels/cpu/kernel_split_avx.cpp
kernels/cpu/kernel_split_avx2.cpp
kernels/cpu/filter.cpp
kernels/cpu/filter_sse2.cpp
kernels/cpu/filter_sse3.cpp
kernels/cpu/filter_sse41.cpp
kernels/cpu/filter_avx.cpp
kernels/cpu/filter_avx2.cpp
kernels/cpu/kernel.cpp
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp
kernels/cpu/kernel_avx.cpp
kernels/cpu/kernel_avx2.cpp
kernels/cpu/kernel_split.cpp
kernels/cpu/kernel_split_sse2.cpp
kernels/cpu/kernel_split_sse3.cpp
kernels/cpu/kernel_split_sse41.cpp
kernels/cpu/kernel_split_avx.cpp
kernels/cpu/kernel_split_avx2.cpp
kernels/cpu/filter.cpp
kernels/cpu/filter_sse2.cpp
kernels/cpu/filter_sse3.cpp
kernels/cpu/filter_sse41.cpp
kernels/cpu/filter_avx.cpp
kernels/cpu/filter_avx2.cpp
)
set(SRC_CUDA_KERNELS
kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu
kernels/cuda/filter.cu
kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu
kernels/cuda/filter.cu
)
set(SRC_OPENCL_KERNELS
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl
kernels/opencl/kernel_background.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split_bundle.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
kernels/opencl/kernel_scene_intersect.cl
kernels/opencl/kernel_lamp_emission.cl
kernels/opencl/kernel_do_volume.cl
kernels/opencl/kernel_indirect_background.cl
kernels/opencl/kernel_shader_setup.cl
kernels/opencl/kernel_shader_sort.cl
kernels/opencl/kernel_shader_eval.cl
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl
kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked_ao.cl
kernels/opencl/kernel_shadow_blocked_dl.cl
kernels/opencl/kernel_enqueue_inactive.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
kernels/opencl/filter.cl
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl
kernels/opencl/kernel_background.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split_bundle.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
kernels/opencl/kernel_scene_intersect.cl
kernels/opencl/kernel_lamp_emission.cl
kernels/opencl/kernel_do_volume.cl
kernels/opencl/kernel_indirect_background.cl
kernels/opencl/kernel_shader_setup.cl
kernels/opencl/kernel_shader_sort.cl
kernels/opencl/kernel_shader_eval.cl
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl
kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked_ao.cl
kernels/opencl/kernel_shadow_blocked_dl.cl
kernels/opencl/kernel_enqueue_inactive.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
kernels/opencl/filter.cl
)
set(SRC_BVH_HEADERS
bvh/bvh.h
bvh/bvh_nodes.h
bvh/bvh_shadow_all.h
bvh/bvh_local.h
bvh/bvh_traversal.h
bvh/bvh_types.h
bvh/bvh_volume.h
bvh/bvh_volume_all.h
bvh/qbvh_nodes.h
bvh/qbvh_shadow_all.h
bvh/qbvh_local.h
bvh/qbvh_traversal.h
bvh/qbvh_volume.h
bvh/qbvh_volume_all.h
bvh/obvh_nodes.h
bvh/obvh_shadow_all.h
bvh/obvh_local.h
bvh/obvh_traversal.h
bvh/obvh_volume.h
bvh/obvh_volume_all.h
bvh/bvh_embree.h
bvh/bvh.h
bvh/bvh_nodes.h
bvh/bvh_shadow_all.h
bvh/bvh_local.h
bvh/bvh_traversal.h
bvh/bvh_types.h
bvh/bvh_volume.h
bvh/bvh_volume_all.h
bvh/qbvh_nodes.h
bvh/qbvh_shadow_all.h
bvh/qbvh_local.h
bvh/qbvh_traversal.h
bvh/qbvh_volume.h
bvh/qbvh_volume_all.h
bvh/obvh_nodes.h
bvh/obvh_shadow_all.h
bvh/obvh_local.h
bvh/obvh_traversal.h
bvh/obvh_volume.h
bvh/obvh_volume_all.h
bvh/bvh_embree.h
)
set(SRC_HEADERS
kernel_accumulate.h
kernel_bake.h
kernel_camera.h
kernel_color.h
kernel_compat_cpu.h
kernel_compat_cuda.h
kernel_compat_opencl.h
kernel_differential.h
kernel_emission.h
kernel_film.h
kernel_globals.h
kernel_id_passes.h
kernel_jitter.h
kernel_light.h
kernel_math.h
kernel_montecarlo.h
kernel_passes.h
kernel_path.h
kernel_path_branched.h
kernel_path_common.h
kernel_path_state.h
kernel_path_surface.h
kernel_path_subsurface.h
kernel_path_volume.h
kernel_profiling.h
kernel_projection.h
kernel_queues.h
kernel_random.h
kernel_shader.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
kernel_types.h
kernel_volume.h
kernel_work_stealing.h
kernel_accumulate.h
kernel_bake.h
kernel_camera.h
kernel_color.h
kernel_compat_cpu.h
kernel_compat_cuda.h
kernel_compat_opencl.h
kernel_differential.h
kernel_emission.h
kernel_film.h
kernel_globals.h
kernel_id_passes.h
kernel_jitter.h
kernel_light.h
kernel_math.h
kernel_montecarlo.h
kernel_passes.h
kernel_path.h
kernel_path_branched.h
kernel_path_common.h
kernel_path_state.h
kernel_path_surface.h
kernel_path_subsurface.h
kernel_path_volume.h
kernel_profiling.h
kernel_projection.h
kernel_queues.h
kernel_random.h
kernel_shader.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
kernel_types.h
kernel_volume.h
kernel_work_stealing.h
)
set(SRC_KERNELS_CPU_HEADERS
kernel.h
kernels/cpu/kernel_cpu.h
kernels/cpu/kernel_cpu_impl.h
kernels/cpu/kernel_cpu_image.h
kernels/cpu/filter_cpu.h
kernels/cpu/filter_cpu_impl.h
kernel.h
kernels/cpu/kernel_cpu.h
kernels/cpu/kernel_cpu_impl.h
kernels/cpu/kernel_cpu_image.h
kernels/cpu/filter_cpu.h
kernels/cpu/filter_cpu_impl.h
)
set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_config.h
kernels/cuda/kernel_cuda_image.h
kernels/cuda/kernel_config.h
kernels/cuda/kernel_cuda_image.h
)
set(SRC_KERNELS_OPENCL_HEADERS
kernels/opencl/kernel_split_function.h
kernels/opencl/kernel_opencl_image.h
kernels/opencl/kernel_split_function.h
kernels/opencl/kernel_opencl_image.h
)
set(SRC_CLOSURE_HEADERS
closure/alloc.h
closure/bsdf.h
closure/bsdf_ashikhmin_velvet.h
closure/bsdf_diffuse.h
closure/bsdf_diffuse_ramp.h
closure/bsdf_microfacet.h
closure/bsdf_microfacet_multi.h
closure/bsdf_microfacet_multi_impl.h
closure/bsdf_oren_nayar.h
closure/bsdf_phong_ramp.h
closure/bsdf_reflection.h
closure/bsdf_refraction.h
closure/bsdf_toon.h
closure/bsdf_transparent.h
closure/bsdf_util.h
closure/bsdf_ashikhmin_shirley.h
closure/bsdf_hair.h
closure/bssrdf.h
closure/emissive.h
closure/volume.h
closure/bsdf_principled_diffuse.h
closure/bsdf_principled_sheen.h
closure/alloc.h
closure/bsdf.h
closure/bsdf_ashikhmin_velvet.h
closure/bsdf_diffuse.h
closure/bsdf_diffuse_ramp.h
closure/bsdf_microfacet.h
closure/bsdf_microfacet_multi.h
closure/bsdf_microfacet_multi_impl.h
closure/bsdf_oren_nayar.h
closure/bsdf_phong_ramp.h
closure/bsdf_reflection.h
closure/bsdf_refraction.h
closure/bsdf_toon.h
closure/bsdf_transparent.h
closure/bsdf_util.h
closure/bsdf_ashikhmin_shirley.h
closure/bsdf_hair.h
closure/bssrdf.h
closure/emissive.h
closure/volume.h
closure/bsdf_principled_diffuse.h
closure/bsdf_principled_sheen.h
closure/bsdf_hair_principled.h
)
set(SRC_SVM_HEADERS
svm/svm.h
svm/svm_ao.h
svm/svm_attribute.h
svm/svm_bevel.h
svm/svm_blackbody.h
svm/svm_bump.h
svm/svm_camera.h
svm/svm_closure.h
svm/svm_convert.h
svm/svm_checker.h
svm/svm_color_util.h
svm/svm_brick.h
svm/svm_displace.h
svm/svm_fresnel.h
svm/svm_wireframe.h
svm/svm_wavelength.h
svm/svm_gamma.h
svm/svm_brightness.h
svm/svm_geometry.h
svm/svm_gradient.h
svm/svm_hsv.h
svm/svm_ies.h
svm/svm_image.h
svm/svm_invert.h
svm/svm_light_path.h
svm/svm_magic.h
svm/svm_mapping.h
svm/svm_math.h
svm/svm_math_util.h
svm/svm_mix.h
svm/svm_musgrave.h
svm/svm_noise.h
svm/svm_noisetex.h
svm/svm_normal.h
svm/svm_ramp.h
svm/svm_ramp_util.h
svm/svm_sepcomb_hsv.h
svm/svm_sepcomb_vector.h
svm/svm_sky.h
svm/svm_tex_coord.h
svm/svm_texture.h
svm/svm_types.h
svm/svm_value.h
svm/svm_vector_transform.h
svm/svm_voronoi.h
svm/svm_voxel.h
svm/svm_wave.h
svm/svm.h
svm/svm_ao.h
svm/svm_attribute.h
svm/svm_bevel.h
svm/svm_blackbody.h
svm/svm_bump.h
svm/svm_camera.h
svm/svm_closure.h
svm/svm_convert.h
svm/svm_checker.h
svm/svm_color_util.h
svm/svm_brick.h
svm/svm_displace.h
svm/svm_fresnel.h
svm/svm_wireframe.h
svm/svm_wavelength.h
svm/svm_gamma.h
svm/svm_brightness.h
svm/svm_geometry.h
svm/svm_gradient.h
svm/svm_hsv.h
svm/svm_ies.h
svm/svm_image.h
svm/svm_invert.h
svm/svm_light_path.h
svm/svm_magic.h
svm/svm_mapping.h
svm/svm_math.h
svm/svm_math_util.h
svm/svm_mix.h
svm/svm_musgrave.h
svm/svm_noise.h
svm/svm_noisetex.h
svm/svm_normal.h
svm/svm_ramp.h
svm/svm_ramp_util.h
svm/svm_sepcomb_hsv.h
svm/svm_sepcomb_vector.h
svm/svm_sky.h
svm/svm_tex_coord.h
svm/svm_texture.h
svm/svm_types.h
svm/svm_value.h
svm/svm_vector_transform.h
svm/svm_voronoi.h
svm/svm_voxel.h
svm/svm_wave.h
)
set(SRC_GEOM_HEADERS
geom/geom.h
geom/geom_attribute.h
geom/geom_curve.h
geom/geom_curve_intersect.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
geom/geom_motion_triangle_intersect.h
geom/geom_motion_triangle_shader.h
geom/geom_object.h
geom/geom_patch.h
geom/geom_primitive.h
geom/geom_subd_triangle.h
geom/geom_triangle.h
geom/geom_triangle_intersect.h
geom/geom_volume.h
geom/geom.h
geom/geom_attribute.h
geom/geom_curve.h
geom/geom_curve_intersect.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
geom/geom_motion_triangle_intersect.h
geom/geom_motion_triangle_shader.h
geom/geom_object.h
geom/geom_patch.h
geom/geom_primitive.h
geom/geom_subd_triangle.h
geom/geom_triangle.h
geom/geom_triangle_intersect.h
geom/geom_volume.h
)
set(SRC_FILTER_HEADERS
filter/filter.h
filter/filter_defines.h
filter/filter_features.h
filter/filter_features_sse.h
filter/filter_kernel.h
filter/filter_nlm_cpu.h
filter/filter_nlm_gpu.h
filter/filter_prefilter.h
filter/filter_reconstruction.h
filter/filter_transform.h
filter/filter_transform_gpu.h
filter/filter_transform_sse.h
filter/filter.h
filter/filter_defines.h
filter/filter_features.h
filter/filter_features_sse.h
filter/filter_kernel.h
filter/filter_nlm_cpu.h
filter/filter_nlm_gpu.h
filter/filter_prefilter.h
filter/filter_reconstruction.h
filter/filter_transform.h
filter/filter_transform_gpu.h
filter/filter_transform_sse.h
)
set(SRC_UTIL_HEADERS
../util/util_atomic.h
../util/util_color.h
../util/util_defines.h
../util/util_half.h
../util/util_hash.h
../util/util_math.h
../util/util_math_fast.h
../util/util_math_intersect.h
../util/util_math_float2.h
../util/util_math_float3.h
../util/util_math_float4.h
../util/util_math_int2.h
../util/util_math_int3.h
../util/util_math_int4.h
../util/util_math_matrix.h
../util/util_projection.h
../util/util_rect.h
../util/util_static_assert.h
../util/util_transform.h
../util/util_texture.h
../util/util_types.h
../util/util_types_float2.h
../util/util_types_float2_impl.h
../util/util_types_float3.h
../util/util_types_float3_impl.h
../util/util_types_float4.h
../util/util_types_float4_impl.h
../util/util_types_float8.h
../util/util_types_float8_impl.h
../util/util_types_int2.h
../util/util_types_int2_impl.h
../util/util_types_int3.h
../util/util_types_int3_impl.h
../util/util_types_int4.h
../util/util_types_int4_impl.h
../util/util_types_uchar2.h
../util/util_types_uchar2_impl.h
../util/util_types_uchar3.h
../util/util_types_uchar3_impl.h
../util/util_types_uchar4.h
../util/util_types_uchar4_impl.h
../util/util_types_uint2.h
../util/util_types_uint2_impl.h
../util/util_types_uint3.h
../util/util_types_uint3_impl.h
../util/util_types_uint4.h
../util/util_types_uint4_impl.h
../util/util_types_ushort4.h
../util/util_types_vector3.h
../util/util_types_vector3_impl.h
../util/util_atomic.h
../util/util_color.h
../util/util_defines.h
../util/util_half.h
../util/util_hash.h
../util/util_math.h
../util/util_math_fast.h
../util/util_math_intersect.h
../util/util_math_float2.h
../util/util_math_float3.h
../util/util_math_float4.h
../util/util_math_int2.h
../util/util_math_int3.h
../util/util_math_int4.h
../util/util_math_matrix.h
../util/util_projection.h
../util/util_rect.h
../util/util_static_assert.h
../util/util_transform.h
../util/util_texture.h
../util/util_types.h
../util/util_types_float2.h
../util/util_types_float2_impl.h
../util/util_types_float3.h
../util/util_types_float3_impl.h
../util/util_types_float4.h
../util/util_types_float4_impl.h
../util/util_types_float8.h
../util/util_types_float8_impl.h
../util/util_types_int2.h
../util/util_types_int2_impl.h
../util/util_types_int3.h
../util/util_types_int3_impl.h
../util/util_types_int4.h
../util/util_types_int4_impl.h
../util/util_types_uchar2.h
../util/util_types_uchar2_impl.h
../util/util_types_uchar3.h
../util/util_types_uchar3_impl.h
../util/util_types_uchar4.h
../util/util_types_uchar4_impl.h
../util/util_types_uint2.h
../util/util_types_uint2_impl.h
../util/util_types_uint3.h
../util/util_types_uint3_impl.h
../util/util_types_uint4.h
../util/util_types_uint4_impl.h
../util/util_types_ushort4.h
../util/util_types_vector3.h
../util/util_types_vector3_impl.h
)
set(SRC_SPLIT_HEADERS
split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
split/kernel_direct_lighting.h
split/kernel_do_volume.h
split/kernel_enqueue_inactive.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h
split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h
split/kernel_path_init.h
split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
split/kernel_shader_setup.h
split/kernel_shader_sort.h
split/kernel_shader_eval.h
split/kernel_shadow_blocked_ao.h
split/kernel_shadow_blocked_dl.h
split/kernel_split_common.h
split/kernel_split_data.h
split/kernel_split_data_types.h
split/kernel_subsurface_scatter.h
split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
split/kernel_direct_lighting.h
split/kernel_do_volume.h
split/kernel_enqueue_inactive.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h
split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h
split/kernel_path_init.h
split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
split/kernel_shader_setup.h
split/kernel_shader_sort.h
split/kernel_shader_eval.h
split/kernel_shadow_blocked_ao.h
split/kernel_shadow_blocked_dl.h
split/kernel_split_common.h
split/kernel_split_data.h
split/kernel_split_data_types.h
split/kernel_subsurface_scatter.h
)
set(LIB
@@ -340,145 +340,145 @@ set(LIB
# CUDA module
if(WITH_CYCLES_CUDA_BINARIES)
# 64 bit only
set(CUDA_BITS 64)
# 64 bit only
set(CUDA_BITS 64)
# CUDA version
execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# CUDA version
execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
if(CUDA_VERSION MATCHES "101")
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
"build may succeed but only CUDA 10.1 is officially supported")
endif()
# warn for other versions
if(CUDA_VERSION MATCHES "101")
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
"build may succeed but only CUDA 10.1 is officially supported")
endif()
# build for each arch
set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_BVH_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_UTIL_HEADERS}
)
set(cuda_filter_sources kernels/cuda/filter.cu
${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_FILTER_HEADERS}
${SRC_UTIL_HEADERS}
)
set(cuda_cubins)
# build for each arch
set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_BVH_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_UTIL_HEADERS}
)
set(cuda_filter_sources kernels/cuda/filter.cu
${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_FILTER_HEADERS}
${SRC_UTIL_HEADERS}
)
set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
set(cuda_cubin ${name}_${arch}.cubin)
macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
set(cuda_cubin ${name}_${arch}.cubin)
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
endif()
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
endif()
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
set(cuda_flags
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D NVCC
-m ${CUDA_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
--use_fast_math
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
set(cuda_flags
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D NVCC
-m ${CUDA_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
--use_fast_math
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
if(${experimental})
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
set(name ${name}_experimental)
endif()
if(${experimental})
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
set(name ${name}_experimental)
endif()
if(WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
endif()
if(WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
endif()
if(WITH_CYCLES_CUBIN_COMPILER)
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
if(WITH_CYCLES_CUBIN_COMPILER)
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
# Needed to find libnvrtc-builtins.so. Can't do it from inside
# cycles_cubin_cc since the env variable is read before main()
if(APPLE)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
elseif(UNIX)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif()
# Needed to find libnvrtc-builtins.so. Can't do it from inside
# cycles_cubin_cc since the env variable is read before main()
if(APPLE)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
elseif(UNIX)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH}
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
${cuda_flags}
-v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
else()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUDA_NVCC_EXECUTABLE}
-arch=${arch}
${CUDA_NVCC_FLAGS}
--cubin
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--ptxas-options="-v"
${cuda_flags}
DEPENDS ${kernel_sources})
endif()
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin})
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH}
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
${cuda_flags}
-v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
else()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUDA_NVCC_EXECUTABLE}
-arch=${arch}
${CUDA_NVCC_FLAGS}
--cubin
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--ptxas-options="-v"
${cuda_flags}
DEPENDS ${kernel_sources})
endif()
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin})
unset(cuda_debug_flags)
endmacro()
unset(cuda_debug_flags)
endmacro()
set(prev_arch "none")
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
if(${arch} MATCHES "sm_2.")
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
else()
# Compile regular kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
set(prev_arch "none")
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
if(${arch} MATCHES "sm_2.")
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
else()
# Compile regular kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
# Compile split kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
endif()
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
# Compile split kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
endif()
if(WITH_CYCLES_CUDA_BUILD_SERIAL)
set(prev_arch ${arch})
endif()
endif()
endforeach()
if(WITH_CYCLES_CUDA_BUILD_SERIAL)
set(prev_arch ${arch})
endif()
endif()
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
cycles_set_solution_folder(cycles_kernel_cuda)
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
cycles_set_solution_folder(cycles_kernel_cuda)
endif()
# OSL module
if(WITH_CYCLES_OSL)
list(APPEND LIB
cycles_kernel_osl
)
add_subdirectory(osl)
add_subdirectory(shaders)
list(APPEND LIB
cycles_kernel_osl
)
add_subdirectory(osl)
add_subdirectory(shaders)
endif()
# CPU module
@@ -491,56 +491,56 @@ set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAG
set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
if(CXX_HAS_SSE)
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
cycles_add_library(cycles_kernel "${LIB}"
${SRC_CPU_KERNELS}
${SRC_CUDA_KERNELS}
${SRC_OPENCL_KERNELS}
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_KERNELS_OPENCL_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_FILTER_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
${SRC_SPLIT_HEADERS}
${SRC_CPU_KERNELS}
${SRC_CUDA_KERNELS}
${SRC_OPENCL_KERNELS}
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_KERNELS_OPENCL_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_FILTER_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
${SRC_SPLIT_HEADERS}
)
if(WITH_CYCLES_CUDA)
add_dependencies(cycles_kernel cycles_kernel_cuda)
add_dependencies(cycles_kernel cycles_kernel_cuda)
endif()
# OpenCL kernel
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
# OUTPUT ${KERNEL_PREPROCESSED}
# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
# OUTPUT ${KERNEL_PREPROCESSED}
# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)

Some files were not shown because too many files have changed in this diff Show More