ClangFormat: apply to source, most of intern

Apply clang format as proposed in T53211.

For details on usage and instructions for migrating branches
without conflicts, see:

https://wiki.blender.org/wiki/Tools/ClangFormat
This commit is contained in:
2019-04-17 06:17:24 +02:00
parent b3dabc200a
commit e12c08e8d1
4481 changed files with 1230080 additions and 1155401 deletions

View File

@@ -113,7 +113,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new); ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */ ATOMIC_INLINE size_t
atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x); ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x); ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
@@ -123,7 +124,6 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new); ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new);
ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new); ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new);
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation, /* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,

View File

@@ -56,105 +56,106 @@ ATOMIC_STATIC_ASSERT(sizeof(size_t) == LG_SIZEOF_PTR, "sizeof(size_t) != LG_SIZE
ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x) ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x)
{ {
#if (LG_SIZEOF_PTR == 8) #if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x); return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 4) #elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x); return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif #endif
} }
ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x) ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x)
{ {
#if (LG_SIZEOF_PTR == 8) #if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_PTR == 4) #elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif #endif
} }
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x) ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x)
{ {
#if (LG_SIZEOF_PTR == 8) #if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x); return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 4) #elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x); return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
#endif #endif
} }
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x) ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x)
{ {
#if (LG_SIZEOF_PTR == 8) #if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_PTR == 4) #elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif #endif
} }
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new) ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
{ {
#if (LG_SIZEOF_PTR == 8) #if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new); return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
#elif (LG_SIZEOF_PTR == 4) #elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new); return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
#endif #endif
} }
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x) ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
{ {
size_t prev_value; size_t prev_value;
while((prev_value = *p) < x) { while ((prev_value = *p) < x) {
if(atomic_cas_z(p, prev_value, x) == prev_value) { if (atomic_cas_z(p, prev_value, x) == prev_value) {
break; break;
} }
} }
return prev_value; return prev_value;
} }
/******************************************************************************/ /******************************************************************************/
/* unsigned operations. */ /* unsigned operations. */
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT, "sizeof(unsigned int) != LG_SIZEOF_INT"); ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT,
"sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x) ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
{ {
#if (LG_SIZEOF_INT == 8) #if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x); return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4) #elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x); return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif #endif
} }
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x) ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
{ {
#if (LG_SIZEOF_INT == 8) #if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_INT == 4) #elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif #endif
} }
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x) ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x)
{ {
#if (LG_SIZEOF_INT == 8) #if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x); return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4) #elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x); return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
#endif #endif
} }
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x) ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
{ {
#if (LG_SIZEOF_INT == 8) #if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_INT == 4) #elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif #endif
} }
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new) ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new)
{ {
#if (LG_SIZEOF_INT == 8) #if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new); return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
#elif (LG_SIZEOF_INT == 4) #elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new); return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
#endif #endif
} }
@@ -162,12 +163,12 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
/* Char operations. */ /* Char operations. */
ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b) ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b)
{ {
return (char)atomic_fetch_and_or_uint8((uint8_t *)p, (uint8_t)b); return (char)atomic_fetch_and_or_uint8((uint8_t *)p, (uint8_t)b);
} }
ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b) ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b)
{ {
return (char)atomic_fetch_and_and_uint8((uint8_t *)p, (uint8_t)b); return (char)atomic_fetch_and_and_uint8((uint8_t *)p, (uint8_t)b);
} }
/******************************************************************************/ /******************************************************************************/
@@ -176,9 +177,9 @@ ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b)
ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new) ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new)
{ {
#if (LG_SIZEOF_PTR == 8) #if (LG_SIZEOF_PTR == 8)
return (void *)atomic_cas_uint64((uint64_t *)v, *(uint64_t *)&old, *(uint64_t *)&_new); return (void *)atomic_cas_uint64((uint64_t *)v, *(uint64_t *)&old, *(uint64_t *)&_new);
#elif (LG_SIZEOF_PTR == 4) #elif (LG_SIZEOF_PTR == 4)
return (void *)atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new); return (void *)atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new);
#endif #endif
} }
@@ -188,22 +189,22 @@ ATOMIC_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t), "sizeof(float) != sizeof
ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new) ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new)
{ {
uint32_t ret = atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new); uint32_t ret = atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new);
return *(float *)&ret; return *(float *)&ret;
} }
ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x) ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x)
{ {
float oldval, newval; float oldval, newval;
uint32_t prevval; uint32_t prevval;
do { /* Note that since collisions are unlikely, loop will nearly always run once. */ do { /* Note that since collisions are unlikely, loop will nearly always run once. */
oldval = *p; oldval = *p;
newval = oldval + x; newval = oldval + x;
prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval)); prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
} while (_ATOMIC_UNLIKELY(prevval != *(uint32_t *)(&oldval))); } while (_ATOMIC_UNLIKELY(prevval != *(uint32_t *)(&oldval)));
return newval; return newval;
} }
#endif /* __ATOMIC_OPS_EXT_H__ */ #endif /* __ATOMIC_OPS_EXT_H__ */

View File

@@ -40,7 +40,7 @@
#include <windows.h> #include <windows.h>
#include <intrin.h> #include <intrin.h>
#if defined (__clang__) #if defined(__clang__)
# pragma GCC diagnostic push # pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wincompatible-pointer-types" # pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#endif #endif
@@ -50,53 +50,53 @@
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{ {
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x; return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
} }
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{ {
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x; return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
} }
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{ {
return InterlockedCompareExchange64((int64_t *)v, _new, old); return InterlockedCompareExchange64((int64_t *)v, _new, old);
} }
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{ {
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x); return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
} }
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{ {
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)); return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x));
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{ {
return InterlockedExchangeAdd64(p, x) + x; return InterlockedExchangeAdd64(p, x) + x;
} }
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{ {
return InterlockedExchangeAdd64(p, -x) - x; return InterlockedExchangeAdd64(p, -x) - x;
} }
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new) ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{ {
return InterlockedCompareExchange64(v, _new, old); return InterlockedCompareExchange64(v, _new, old);
} }
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{ {
return InterlockedExchangeAdd64(p, x); return InterlockedExchangeAdd64(p, x);
} }
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{ {
return InterlockedExchangeAdd64(p, -x); return InterlockedExchangeAdd64(p, -x);
} }
#endif #endif
@@ -105,63 +105,63 @@ ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{ {
return InterlockedExchangeAdd(p, x) + x; return InterlockedExchangeAdd(p, x) + x;
} }
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{ {
return InterlockedExchangeAdd(p, -((int32_t)x)) - x; return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
} }
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{ {
return InterlockedCompareExchange((long *)v, _new, old); return InterlockedCompareExchange((long *)v, _new, old);
} }
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{ {
return InterlockedExchangeAdd(p, x); return InterlockedExchangeAdd(p, x);
} }
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
{ {
return InterlockedOr((long *)p, x); return InterlockedOr((long *)p, x);
} }
ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
{ {
return InterlockedAnd((long *)p, x); return InterlockedAnd((long *)p, x);
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{ {
return InterlockedExchangeAdd((long *)p, x) + x; return InterlockedExchangeAdd((long *)p, x) + x;
} }
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{ {
return InterlockedExchangeAdd((long *)p, -x) - x; return InterlockedExchangeAdd((long *)p, -x) - x;
} }
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new) ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{ {
return InterlockedCompareExchange((long *)v, _new, old); return InterlockedCompareExchange((long *)v, _new, old);
} }
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{ {
return InterlockedExchangeAdd((long *)p, x); return InterlockedExchangeAdd((long *)p, x);
} }
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
{ {
return InterlockedOr((long *)p, x); return InterlockedOr((long *)p, x);
} }
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
{ {
return InterlockedAnd((long *)p, x); return InterlockedAnd((long *)p, x);
} }
/******************************************************************************/ /******************************************************************************/
@@ -172,9 +172,9 @@ ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
{ {
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedAnd8((char *)p, (char)b); return InterlockedAnd8((char *)p, (char)b);
#else #else
return _InterlockedAnd8((char *)p, (char)b); return _InterlockedAnd8((char *)p, (char)b);
#endif #endif
} }
@@ -182,9 +182,9 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
{ {
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedOr8((char *)p, (char)b); return InterlockedOr8((char *)p, (char)b);
#else #else
return _InterlockedOr8((char *)p, (char)b); return _InterlockedOr8((char *)p, (char)b);
#endif #endif
} }
@@ -193,9 +193,9 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b) ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
{ {
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedAnd8((char *)p, (char)b); return InterlockedAnd8((char *)p, (char)b);
#else #else
return _InterlockedAnd8((char *)p, (char)b); return _InterlockedAnd8((char *)p, (char)b);
#endif #endif
} }
@@ -203,14 +203,13 @@ ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b) ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
{ {
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedOr8((char *)p, (char)b); return InterlockedOr8((char *)p, (char)b);
#else #else
return _InterlockedOr8((char *)p, (char)b); return _InterlockedOr8((char *)p, (char)b);
#endif #endif
} }
#if defined(__clang__)
#if defined (__clang__)
# pragma GCC diagnostic pop # pragma GCC diagnostic pop
#endif #endif

View File

@@ -56,140 +56,128 @@
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{ {
return __sync_add_and_fetch(p, x); return __sync_add_and_fetch(p, x);
} }
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{ {
return __sync_sub_and_fetch(p, x); return __sync_sub_and_fetch(p, x);
} }
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{ {
return __sync_fetch_and_add(p, x); return __sync_fetch_and_add(p, x);
} }
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{ {
return __sync_fetch_and_sub(p, x); return __sync_fetch_and_sub(p, x);
} }
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{ {
return __sync_val_compare_and_swap(v, old, _new); return __sync_val_compare_and_swap(v, old, _new);
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{ {
return __sync_add_and_fetch(p, x); return __sync_add_and_fetch(p, x);
} }
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{ {
return __sync_sub_and_fetch(p, x); return __sync_sub_and_fetch(p, x);
} }
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{ {
return __sync_fetch_and_add(p, x); return __sync_fetch_and_add(p, x);
} }
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{ {
return __sync_fetch_and_sub(p, x); return __sync_fetch_and_sub(p, x);
} }
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new) ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{ {
return __sync_val_compare_and_swap(v, old, _new); return __sync_val_compare_and_swap(v, old, _new);
} }
# elif (defined(__amd64__) || defined(__x86_64__)) # elif (defined(__amd64__) || defined(__x86_64__))
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{ {
asm volatile ( asm volatile("lock; xaddq %0, %1;"
"lock; xaddq %0, %1;" : "+r"(x), "=m"(*p) /* Outputs. */
: "+r" (x), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return x;
return x;
} }
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{ {
x = (uint64_t)(-(int64_t)x); x = (uint64_t)(-(int64_t)x);
asm volatile ( asm volatile("lock; xaddq %0, %1;"
"lock; xaddq %0, %1;" : "+r"(x), "=m"(*p) /* Outputs. */
: "+r" (x), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return x;
return x;
} }
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{ {
return atomic_fetch_and_add_uint64(p, x) + x; return atomic_fetch_and_add_uint64(p, x) + x;
} }
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x) ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{ {
return atomic_fetch_and_sub_uint64(p, x) - x; return atomic_fetch_and_sub_uint64(p, x) - x;
} }
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{ {
uint64_t ret; uint64_t ret;
asm volatile ( asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
"lock; cmpxchgq %2,%1" return ret;
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{ {
asm volatile ( asm volatile("lock; xaddq %0, %1;"
"lock; xaddq %0, %1;" : "+r"(x), "=m"(*p) /* Outputs. */
: "+r" (x), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return x;
return x;
} }
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{ {
x = -x; x = -x;
asm volatile ( asm volatile("lock; xaddq %0, %1;"
"lock; xaddq %0, %1;" : "+r"(x), "=m"(*p) /* Outputs. */
: "+r" (x), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return x;
return x;
} }
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{ {
return atomic_fetch_and_add_int64(p, x) + x; return atomic_fetch_and_add_int64(p, x) + x;
} }
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x) ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{ {
return atomic_fetch_and_sub_int64(p, x) - x; return atomic_fetch_and_sub_int64(p, x) - x;
} }
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new) ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{ {
int64_t ret; int64_t ret;
asm volatile ( asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
"lock; cmpxchgq %2,%1" return ret;
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
} }
# else # else
# error "Missing implementation for 64-bit atomic operations" # error "Missing implementation for 64-bit atomic operations"
@@ -202,102 +190,90 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{ {
return __sync_add_and_fetch(p, x); return __sync_add_and_fetch(p, x);
} }
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{ {
return __sync_sub_and_fetch(p, x); return __sync_sub_and_fetch(p, x);
} }
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{ {
return __sync_val_compare_and_swap(v, old, _new); return __sync_val_compare_and_swap(v, old, _new);
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{ {
return __sync_add_and_fetch(p, x); return __sync_add_and_fetch(p, x);
} }
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{ {
return __sync_sub_and_fetch(p, x); return __sync_sub_and_fetch(p, x);
} }
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new) ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{ {
return __sync_val_compare_and_swap(v, old, _new); return __sync_val_compare_and_swap(v, old, _new);
} }
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{ {
uint32_t ret = x; uint32_t ret = x;
asm volatile ( asm volatile("lock; xaddl %0, %1;"
"lock; xaddl %0, %1;" : "+r"(ret), "=m"(*p) /* Outputs. */
: "+r" (ret), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return ret + x;
return ret + x;
} }
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{ {
uint32_t ret = (uint32_t)(-(int32_t)x); uint32_t ret = (uint32_t)(-(int32_t)x);
asm volatile ( asm volatile("lock; xaddl %0, %1;"
"lock; xaddl %0, %1;" : "+r"(ret), "=m"(*p) /* Outputs. */
: "+r" (ret), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return ret - x;
return ret - x;
} }
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{ {
uint32_t ret; uint32_t ret;
asm volatile ( asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
"lock; cmpxchgl %2,%1" return ret;
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{ {
int32_t ret = x; int32_t ret = x;
asm volatile ( asm volatile("lock; xaddl %0, %1;"
"lock; xaddl %0, %1;" : "+r"(ret), "=m"(*p) /* Outputs. */
: "+r" (ret), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return ret + x;
return ret + x;
} }
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{ {
int32_t ret = -x; int32_t ret = -x;
asm volatile ( asm volatile("lock; xaddl %0, %1;"
"lock; xaddl %0, %1;" : "+r"(ret), "=m"(*p) /* Outputs. */
: "+r" (ret), "=m" (*p) /* Outputs. */ : "m"(*p) /* Inputs. */
: "m" (*p) /* Inputs. */ );
); return ret - x;
return ret - x;
} }
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new) ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{ {
int32_t ret; int32_t ret;
asm volatile ( asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
"lock; cmpxchgl %2,%1" return ret;
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
} }
#else #else
@@ -308,33 +284,33 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{ {
return __sync_fetch_and_add(p, x); return __sync_fetch_and_add(p, x);
} }
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
{ {
return __sync_fetch_and_or(p, x); return __sync_fetch_and_or(p, x);
} }
ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x) ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
{ {
return __sync_fetch_and_and(p, x); return __sync_fetch_and_and(p, x);
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{ {
return __sync_fetch_and_add(p, x); return __sync_fetch_and_add(p, x);
} }
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
{ {
return __sync_fetch_and_or(p, x); return __sync_fetch_and_or(p, x);
} }
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x) ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
{ {
return __sync_fetch_and_and(p, x); return __sync_fetch_and_and(p, x);
} }
#else #else
@@ -347,21 +323,21 @@ ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
/* Unsigned */ /* Unsigned */
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
{ {
return __sync_fetch_and_and(p, b); return __sync_fetch_and_and(p, b);
} }
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
{ {
return __sync_fetch_and_or(p, b); return __sync_fetch_and_or(p, b);
} }
/* Signed */ /* Signed */
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b) ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
{ {
return __sync_fetch_and_and(p, b); return __sync_fetch_and_and(p, b);
} }
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b) ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
{ {
return __sync_fetch_and_or(p, b); return __sync_fetch_and_or(p, b);
} }
#else #else

View File

@@ -62,11 +62,11 @@
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
# define _ATOMIC_LIKELY(x) __builtin_expect(!!(x), 1) # define _ATOMIC_LIKELY(x) __builtin_expect(!!(x), 1)
# define _ATOMIC_UNLIKELY(x) __builtin_expect(!!(x), 0) # define _ATOMIC_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else #else
# define _ATOMIC_LIKELY(x) (x) # define _ATOMIC_LIKELY(x) (x)
# define _ATOMIC_UNLIKELY(x) (x) # define _ATOMIC_UNLIKELY(x) (x)
#endif #endif
#if defined(__SIZEOF_POINTER__) #if defined(__SIZEOF_POINTER__)
@@ -77,7 +77,7 @@
# elif (UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF) # elif (UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF)
# define LG_SIZEOF_PTR 8 # define LG_SIZEOF_PTR 8
# endif # endif
#elif defined(__WORDSIZE) /* Fallback for older glibc and cpp */ #elif defined(__WORDSIZE) /* Fallback for older glibc and cpp */
# if (__WORDSIZE == 32) # if (__WORDSIZE == 32)
# define LG_SIZEOF_PTR 4 # define LG_SIZEOF_PTR 4
# elif (__WORDSIZE == 64) # elif (__WORDSIZE == 64)
@@ -100,9 +100,8 @@
/* Copied from BLI_utils... */ /* Copied from BLI_utils... */
/* C++ can't use _Static_assert, expects static_assert() but c++0x only, /* C++ can't use _Static_assert, expects static_assert() but c++0x only,
* Coverity also errors out. */ * Coverity also errors out. */
#if (!defined(__cplusplus)) && \ #if (!defined(__cplusplus)) && (!defined(__COVERITY__)) && \
(!defined(__COVERITY__)) && \ (defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 406)) /* gcc4.6+ only */
(defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 406)) /* gcc4.6+ only */
# define ATOMIC_STATIC_ASSERT(a, msg) __extension__ _Static_assert(a, msg); # define ATOMIC_STATIC_ASSERT(a, msg) __extension__ _Static_assert(a, msg);
#else #else
/* Code adapted from http://www.pixelbeat.org/programming/gcc/static_assert.html */ /* Code adapted from http://www.pixelbeat.org/programming/gcc/static_assert.html */
@@ -110,17 +109,19 @@
* expand __LINE__ with one indirection before doing the actual concatenation. */ * expand __LINE__ with one indirection before doing the actual concatenation. */
# define ATOMIC_ASSERT_CONCAT_(a, b) a##b # define ATOMIC_ASSERT_CONCAT_(a, b) a##b
# define ATOMIC_ASSERT_CONCAT(a, b) ATOMIC_ASSERT_CONCAT_(a, b) # define ATOMIC_ASSERT_CONCAT(a, b) ATOMIC_ASSERT_CONCAT_(a, b)
/* These can't be used after statements in c89. */ /* These can't be used after statements in c89. */
# if defined(__COUNTER__) /* MSVC */ # if defined(__COUNTER__) /* MSVC */
# define ATOMIC_STATIC_ASSERT(a, msg) \ # define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) }; ; \
# else /* older gcc, clang... */ enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
/* This can't be used twice on the same line so ensure if using in headers # else /* older gcc, clang... */
/* This can't be used twice on the same line so ensure if using in headers
* that the headers are not included twice (by wrapping in #ifndef...#endif) * that the headers are not included twice (by wrapping in #ifndef...#endif)
* Note it doesn't cause an issue when used on same line of separate modules * Note it doesn't cause an issue when used on same line of separate modules
* compiled with gcc -combine -fwhole-program. */ * compiled with gcc -combine -fwhole-program. */
# define ATOMIC_STATIC_ASSERT(a, msg) \ # define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) }; ; \
enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
# endif # endif
#endif #endif

View File

@@ -22,46 +22,46 @@
remove_strict_flags() remove_strict_flags()
if(CMAKE_COMPILER_IS_GNUCC) if(CMAKE_COMPILER_IS_GNUCC)
remove_cc_flag("-Wunused-macros") remove_cc_flag("-Wunused-macros")
endif() endif()
set(INC set(INC
. .
) )
set(INC_SYS set(INC_SYS
${AUDASPACE_C_INCLUDE_DIRS} ${AUDASPACE_C_INCLUDE_DIRS}
${AUDASPACE_PY_INCLUDE_DIRS} ${AUDASPACE_PY_INCLUDE_DIRS}
) )
set(SRC set(SRC
intern/AUD_Set.cpp intern/AUD_Set.cpp
intern/AUD_Set.h intern/AUD_Set.h
) )
set(LIB set(LIB
) )
if(NOT WITH_SYSTEM_AUDASPACE) if(NOT WITH_SYSTEM_AUDASPACE)
list(APPEND LIB list(APPEND LIB
audaspace audaspace
) )
endif() endif()
if(WITH_PYTHON) if(WITH_PYTHON)
list(APPEND INC_SYS list(APPEND INC_SYS
${PYTHON_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}
) )
list(APPEND SRC list(APPEND SRC
intern/AUD_PyInit.cpp intern/AUD_PyInit.cpp
intern/AUD_PyInit.h intern/AUD_PyInit.h
) )
if(NOT WITH_SYSTEM_AUDASPACE) if(NOT WITH_SYSTEM_AUDASPACE)
list(APPEND LIB list(APPEND LIB
audaspace-py audaspace-py
) )
endif() endif()
add_definitions(-DWITH_PYTHON) add_definitions(-DWITH_PYTHON)
endif() endif()
blender_add_lib(bf_intern_audaspace "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") blender_add_lib(bf_intern_audaspace "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

View File

@@ -34,46 +34,47 @@ extern void *BKE_sound_get_factory(void *sound);
static PyObject *AUD_getSoundFromPointer(PyObject *self, PyObject *args) static PyObject *AUD_getSoundFromPointer(PyObject *self, PyObject *args)
{ {
long int lptr; long int lptr;
if (PyArg_Parse(args, "l:_sound_from_pointer", &lptr)) { if (PyArg_Parse(args, "l:_sound_from_pointer", &lptr)) {
if (lptr) { if (lptr) {
AUD_Sound* sound = BKE_sound_get_factory((void *) lptr); AUD_Sound *sound = BKE_sound_get_factory((void *)lptr);
if (sound) { if (sound) {
Sound *obj = (Sound *)Sound_empty(); Sound *obj = (Sound *)Sound_empty();
if (obj) { if (obj) {
obj->sound = AUD_Sound_copy(sound); obj->sound = AUD_Sound_copy(sound);
return (PyObject *) obj; return (PyObject *)obj;
} }
} }
} }
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} }
static PyMethodDef meth_sound_from_pointer[] = { static PyMethodDef meth_sound_from_pointer[] = {
{"_sound_from_pointer", (PyCFunction)AUD_getSoundFromPointer, METH_O, {"_sound_from_pointer",
(PyCFunction)AUD_getSoundFromPointer,
METH_O,
"_sound_from_pointer(pointer)\n\n" "_sound_from_pointer(pointer)\n\n"
"Returns the corresponding :class:`Factory` object.\n\n" "Returns the corresponding :class:`Factory` object.\n\n"
":arg pointer: The pointer to the bSound object as long.\n" ":arg pointer: The pointer to the bSound object as long.\n"
":type pointer: long\n" ":type pointer: long\n"
":return: The corresponding :class:`Factory` object.\n" ":return: The corresponding :class:`Factory` object.\n"
":rtype: :class:`Factory`"} ":rtype: :class:`Factory`"}};
};
PyObject *AUD_initPython(void) PyObject *AUD_initPython(void)
{ {
PyObject *module = PyInit_aud(); PyObject *module = PyInit_aud();
if (module == NULL) { if (module == NULL) {
printf("Unable to initialise audio\n"); printf("Unable to initialise audio\n");
return NULL; return NULL;
} }
PyModule_AddObject(module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL)); PyModule_AddObject(
PyDict_SetItemString(PyImport_GetModuleDict(), "aud", module); module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyDict_SetItemString(PyImport_GetModuleDict(), "aud", module);
return module; return module;
} }

View File

@@ -22,26 +22,25 @@
* \ingroup audaspaceintern * \ingroup audaspaceintern
*/ */
#ifndef __AUD_PYINIT_H__ #ifndef __AUD_PYINIT_H__
#define __AUD_PYINIT_H__ #define __AUD_PYINIT_H__
#ifdef WITH_PYTHON #ifdef WITH_PYTHON
#include "Python.h" # include "Python.h"
#ifdef __cplusplus # ifdef __cplusplus
extern "C" { extern "C" {
#endif # endif
/** /**
* Initializes the Python module. * Initializes the Python module.
*/ */
extern PyObject *AUD_initPython(void); extern PyObject *AUD_initPython(void);
#ifdef __cplusplus # ifdef __cplusplus
} }
#endif # endif
#endif #endif
#endif //__AUD_PYINIT_H__ #endif //__AUD_PYINIT_H__

View File

@@ -28,38 +28,38 @@
void *AUD_createSet() void *AUD_createSet()
{ {
return new std::set<void *>(); return new std::set<void *>();
} }
void AUD_destroySet(void *set) void AUD_destroySet(void *set)
{ {
delete reinterpret_cast<std::set<void *>*>(set); delete reinterpret_cast<std::set<void *> *>(set);
} }
char AUD_removeSet(void *set, void *entry) char AUD_removeSet(void *set, void *entry)
{ {
if (set) if (set)
return reinterpret_cast<std::set<void *>*>(set)->erase(entry); return reinterpret_cast<std::set<void *> *>(set)->erase(entry);
return 0; return 0;
} }
void AUD_addSet(void *set, void *entry) void AUD_addSet(void *set, void *entry)
{ {
if (entry) if (entry)
reinterpret_cast<std::set<void *>*>(set)->insert(entry); reinterpret_cast<std::set<void *> *>(set)->insert(entry);
} }
void *AUD_getSet(void *set) void *AUD_getSet(void *set)
{ {
if (set) { if (set) {
std::set<void *>* rset = reinterpret_cast<std::set<void *>*>(set); std::set<void *> *rset = reinterpret_cast<std::set<void *> *>(set);
if (!rset->empty()) { if (!rset->empty()) {
std::set<void *>::iterator it = rset->begin(); std::set<void *>::iterator it = rset->begin();
void *result = *it; void *result = *it;
rset->erase(it); rset->erase(it);
return result; return result;
} }
} }
return (void*) 0; return (void *)0;
} }

View File

@@ -21,7 +21,7 @@
/** \file /** \file
* \ingroup audaspace * \ingroup audaspace
*/ */
#ifndef __AUD_SET_H__ #ifndef __AUD_SET_H__
#define __AUD_SET_H__ #define __AUD_SET_H__
@@ -67,4 +67,4 @@ extern void *AUD_getSet(void *set);
} }
#endif #endif
#endif //__AUD_SET_H__ #endif //__AUD_SET_H__

View File

@@ -73,13 +73,14 @@ extern "C" {
#endif /* __cplusplus */ #endif /* __cplusplus */
#ifdef __GNUC__ #ifdef __GNUC__
# define _CLOG_ATTR_NONNULL(args ...) __attribute__((nonnull(args))) # define _CLOG_ATTR_NONNULL(args...) __attribute__((nonnull(args)))
#else #else
# define _CLOG_ATTR_NONNULL(...) # define _CLOG_ATTR_NONNULL(...)
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) __attribute__((format(printf, format_param, dots_param))) # define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) \
__attribute__((format(printf, format_param, dots_param)))
#else #else
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) # define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param)
#endif #endif
@@ -92,41 +93,44 @@ struct CLogContext;
/* Don't typedef enums. */ /* Don't typedef enums. */
enum CLG_LogFlag { enum CLG_LogFlag {
CLG_FLAG_USE = (1 << 0), CLG_FLAG_USE = (1 << 0),
}; };
enum CLG_Severity { enum CLG_Severity {
CLG_SEVERITY_INFO = 0, CLG_SEVERITY_INFO = 0,
CLG_SEVERITY_WARN, CLG_SEVERITY_WARN,
CLG_SEVERITY_ERROR, CLG_SEVERITY_ERROR,
CLG_SEVERITY_FATAL, CLG_SEVERITY_FATAL,
}; };
#define CLG_SEVERITY_LEN (CLG_SEVERITY_FATAL + 1) #define CLG_SEVERITY_LEN (CLG_SEVERITY_FATAL + 1)
/* Each logger ID has one of these. */ /* Each logger ID has one of these. */
typedef struct CLG_LogType { typedef struct CLG_LogType {
struct CLG_LogType *next; struct CLG_LogType *next;
char identifier[64]; char identifier[64];
/** FILE output. */ /** FILE output. */
struct CLogContext *ctx; struct CLogContext *ctx;
/** Control behavior. */ /** Control behavior. */
int level; int level;
enum CLG_LogFlag flag; enum CLG_LogFlag flag;
} CLG_LogType; } CLG_LogType;
typedef struct CLG_LogRef { typedef struct CLG_LogRef {
const char *identifier; const char *identifier;
CLG_LogType *type; CLG_LogType *type;
} CLG_LogRef; } CLG_LogRef;
void CLG_log_str( void CLG_log_str(CLG_LogType *lg,
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn, enum CLG_Severity severity,
const char *message) const char *file_line,
_CLOG_ATTR_NONNULL(1, 3, 4, 5); const char *fn,
void CLG_logf( const char *message) _CLOG_ATTR_NONNULL(1, 3, 4, 5);
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn, void CLG_logf(CLG_LogType *lg,
const char *format, ...) enum CLG_Severity severity,
_CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6); const char *file_line,
const char *fn,
const char *format,
...) _CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
/* Main initializer and distructor (per session, not logger). */ /* Main initializer and distructor (per session, not logger). */
void CLG_init(void); void CLG_init(void);
@@ -147,51 +151,63 @@ void CLG_logref_init(CLG_LogRef *clg_ref);
/** Declare outside function, declare as extern in header. */ /** Declare outside function, declare as extern in header. */
#define CLG_LOGREF_DECLARE_GLOBAL(var, id) \ #define CLG_LOGREF_DECLARE_GLOBAL(var, id) \
static CLG_LogRef _static_ ## var = {id}; \ static CLG_LogRef _static_##var = {id}; \
CLG_LogRef *var = &_static_ ## var CLG_LogRef *var = &_static_##var
/** Initialize struct once. */ /** Initialize struct once. */
#define CLOG_ENSURE(clg_ref) \ #define CLOG_ENSURE(clg_ref) \
((clg_ref)->type ? (clg_ref)->type : (CLG_logref_init(clg_ref), (clg_ref)->type)) ((clg_ref)->type ? (clg_ref)->type : (CLG_logref_init(clg_ref), (clg_ref)->type))
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) { \ #define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \ { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \ CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
CLG_logf(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, __VA_ARGS__); \ if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
} \ (severity >= CLG_SEVERITY_WARN)) { \
} ((void)0) CLG_logf(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, __VA_ARGS__); \
} \
} \
((void)0)
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) { \ #define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \ { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \ CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, str); \ if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
} \ (severity >= CLG_SEVERITY_WARN)) { \
} ((void)0) CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, str); \
} \
} \
((void)0)
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) { \ #define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \ { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \ CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
const char *_str = str; \ if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, _str); \ (severity >= CLG_SEVERITY_WARN)) { \
MEM_freeN((void *)_str); \ const char *_str = str; \
} \ CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, _str); \
} ((void)0) MEM_freeN((void *)_str); \
} \
} \
((void)0)
#define CLOG_INFO(clg_ref, level, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__) #define CLOG_INFO(clg_ref, level, ...) \
#define CLOG_WARN(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, __VA_ARGS__) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_ERROR(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, __VA_ARGS__) #define CLOG_WARN(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, __VA_ARGS__)
#define CLOG_FATAL(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, __VA_ARGS__) #define CLOG_ERROR(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, __VA_ARGS__)
#define CLOG_FATAL(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, __VA_ARGS__)
#define CLOG_STR_INFO(clg_ref, level, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str) #define CLOG_STR_INFO(clg_ref, level, str) \
#define CLOG_STR_WARN(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_ERROR(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, str) #define CLOG_STR_WARN(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_FATAL(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, str) #define CLOG_STR_ERROR(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, str)
/* Allocated string which is immediately freed. */ /* Allocated string which is immediately freed. */
#define CLOG_STR_INFO_N(clg_ref, level, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str) #define CLOG_STR_INFO_N(clg_ref, level, str) \
#define CLOG_STR_WARN_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_WARN, 0, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_ERROR_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_ERROR, 0, str) #define CLOG_STR_WARN_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_FATAL_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_FATAL, 0, str) #define CLOG_STR_ERROR_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_FATAL, 0, str)
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -17,9 +17,9 @@
# ***** END GPL LICENSE BLOCK ***** # ***** END GPL LICENSE BLOCK *****
set(INC set(INC
. .
../atomic ../atomic
../guardedalloc ../guardedalloc
) )
set(INC_SYS set(INC_SYS
@@ -27,9 +27,9 @@ set(INC_SYS
) )
set(SRC set(SRC
clog.c clog.c
CLG_log.h CLG_log.h
) )
set(LIB set(LIB

View File

@@ -46,7 +46,6 @@
#define __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS
#include <inttypes.h> #include <inttypes.h>
/* Only other dependency (could use regular malloc too). */ /* Only other dependency (could use regular malloc too). */
#include "MEM_guardedalloc.h" #include "MEM_guardedalloc.h"
@@ -68,40 +67,40 @@
* \{ */ * \{ */
typedef struct CLG_IDFilter { typedef struct CLG_IDFilter {
struct CLG_IDFilter *next; struct CLG_IDFilter *next;
/** Over alloc. */ /** Over alloc. */
char match[0]; char match[0];
} CLG_IDFilter; } CLG_IDFilter;
typedef struct CLogContext { typedef struct CLogContext {
/** Single linked list of types. */ /** Single linked list of types. */
CLG_LogType *types; CLG_LogType *types;
#ifdef WITH_CLOG_PTHREADS #ifdef WITH_CLOG_PTHREADS
pthread_mutex_t types_lock; pthread_mutex_t types_lock;
#endif #endif
/* exclude, include filters. */ /* exclude, include filters. */
CLG_IDFilter *filters[2]; CLG_IDFilter *filters[2];
bool use_color; bool use_color;
bool use_basename; bool use_basename;
bool use_timestamp; bool use_timestamp;
/** Borrowed, not owned. */ /** Borrowed, not owned. */
int output; int output;
FILE *output_file; FILE *output_file;
/** For timer (use_timestamp). */ /** For timer (use_timestamp). */
uint64_t timestamp_tick_start; uint64_t timestamp_tick_start;
/** For new types. */ /** For new types. */
struct { struct {
int level; int level;
} default_type; } default_type;
struct { struct {
void (*fatal_fn)(void *file_handle); void (*fatal_fn)(void *file_handle);
void (*backtrace_fn)(void *file_handle); void (*backtrace_fn)(void *file_handle);
} callbacks; } callbacks;
} CLogContext; } CLogContext;
/** \} */ /** \} */
@@ -115,92 +114,92 @@ typedef struct CLogContext {
#define CLOG_BUF_LEN_INIT 512 #define CLOG_BUF_LEN_INIT 512
typedef struct CLogStringBuf { typedef struct CLogStringBuf {
char *data; char *data;
uint len; uint len;
uint len_alloc; uint len_alloc;
bool is_alloc; bool is_alloc;
} CLogStringBuf; } CLogStringBuf;
static void clg_str_init(CLogStringBuf *cstr, char *buf_stack, uint buf_stack_len) static void clg_str_init(CLogStringBuf *cstr, char *buf_stack, uint buf_stack_len)
{ {
cstr->data = buf_stack; cstr->data = buf_stack;
cstr->len_alloc = buf_stack_len; cstr->len_alloc = buf_stack_len;
cstr->len = 0; cstr->len = 0;
cstr->is_alloc = false; cstr->is_alloc = false;
} }
static void clg_str_free(CLogStringBuf *cstr) static void clg_str_free(CLogStringBuf *cstr)
{ {
if (cstr->is_alloc) { if (cstr->is_alloc) {
MEM_freeN(cstr->data); MEM_freeN(cstr->data);
} }
} }
static void clg_str_reserve(CLogStringBuf *cstr, const uint len) static void clg_str_reserve(CLogStringBuf *cstr, const uint len)
{ {
if (len > cstr->len_alloc) { if (len > cstr->len_alloc) {
cstr->len_alloc *= 2; cstr->len_alloc *= 2;
if (len > cstr->len_alloc) { if (len > cstr->len_alloc) {
cstr->len_alloc = len; cstr->len_alloc = len;
} }
if (cstr->is_alloc) { if (cstr->is_alloc) {
cstr->data = MEM_reallocN(cstr->data, cstr->len_alloc); cstr->data = MEM_reallocN(cstr->data, cstr->len_alloc);
} }
else { else {
/* Copy the static buffer. */ /* Copy the static buffer. */
char *data = MEM_mallocN(cstr->len_alloc, __func__); char *data = MEM_mallocN(cstr->len_alloc, __func__);
memcpy(data, cstr->data, cstr->len); memcpy(data, cstr->data, cstr->len);
cstr->data = data; cstr->data = data;
cstr->is_alloc = true; cstr->is_alloc = true;
} }
cstr->len_alloc = len; cstr->len_alloc = len;
} }
} }
static void clg_str_append_with_len(CLogStringBuf *cstr, const char *str, const uint len) static void clg_str_append_with_len(CLogStringBuf *cstr, const char *str, const uint len)
{ {
uint len_next = cstr->len + len; uint len_next = cstr->len + len;
clg_str_reserve(cstr, len_next); clg_str_reserve(cstr, len_next);
char *str_dst = cstr->data + cstr->len; char *str_dst = cstr->data + cstr->len;
memcpy(str_dst, str, len); memcpy(str_dst, str, len);
#if 0 /* no need. */ #if 0 /* no need. */
str_dst[len] = '\0'; str_dst[len] = '\0';
#endif #endif
cstr->len = len_next; cstr->len = len_next;
} }
static void clg_str_append(CLogStringBuf *cstr, const char *str) static void clg_str_append(CLogStringBuf *cstr, const char *str)
{ {
clg_str_append_with_len(cstr, str, strlen(str)); clg_str_append_with_len(cstr, str, strlen(str));
} }
static void clg_str_vappendf(CLogStringBuf *cstr, const char *fmt, va_list args) static void clg_str_vappendf(CLogStringBuf *cstr, const char *fmt, va_list args)
{ {
/* Use limit because windows may use '-1' for a formatting error. */ /* Use limit because windows may use '-1' for a formatting error. */
const uint len_max = 65535; const uint len_max = 65535;
uint len_avail = (cstr->len_alloc - cstr->len); uint len_avail = (cstr->len_alloc - cstr->len);
if (len_avail == 0) { if (len_avail == 0) {
len_avail = CLOG_BUF_LEN_INIT; len_avail = CLOG_BUF_LEN_INIT;
clg_str_reserve(cstr, len_avail); clg_str_reserve(cstr, len_avail);
} }
while (true) { while (true) {
va_list args_cpy; va_list args_cpy;
va_copy(args_cpy, args); va_copy(args_cpy, args);
int retval = vsnprintf(cstr->data + cstr->len, len_avail, fmt, args_cpy); int retval = vsnprintf(cstr->data + cstr->len, len_avail, fmt, args_cpy);
va_end(args_cpy); va_end(args_cpy);
if (retval != -1) { if (retval != -1) {
cstr->len += retval; cstr->len += retval;
break; break;
} }
else { else {
len_avail *= 2; len_avail *= 2;
if (len_avail >= len_max) { if (len_avail >= len_max) {
break; break;
} }
clg_str_reserve(cstr, len_avail); clg_str_reserve(cstr, len_avail);
} }
} }
} }
/** \} */ /** \} */
@@ -210,12 +209,12 @@ static void clg_str_vappendf(CLogStringBuf *cstr, const char *fmt, va_list args)
* \{ */ * \{ */
enum eCLogColor { enum eCLogColor {
COLOR_DEFAULT, COLOR_DEFAULT,
COLOR_RED, COLOR_RED,
COLOR_GREEN, COLOR_GREEN,
COLOR_YELLOW, COLOR_YELLOW,
COLOR_RESET, COLOR_RESET,
}; };
#define COLOR_LEN (COLOR_RESET + 1) #define COLOR_LEN (COLOR_RESET + 1)
@@ -223,61 +222,61 @@ static const char *clg_color_table[COLOR_LEN] = {NULL};
static void clg_color_table_init(bool use_color) static void clg_color_table_init(bool use_color)
{ {
for (int i = 0; i < COLOR_LEN; i++) { for (int i = 0; i < COLOR_LEN; i++) {
clg_color_table[i] = ""; clg_color_table[i] = "";
} }
if (use_color) { if (use_color) {
#ifdef _WIN32 #ifdef _WIN32
/* TODO */ /* TODO */
#else #else
clg_color_table[COLOR_DEFAULT] = "\033[1;37m"; clg_color_table[COLOR_DEFAULT] = "\033[1;37m";
clg_color_table[COLOR_RED] = "\033[1;31m"; clg_color_table[COLOR_RED] = "\033[1;31m";
clg_color_table[COLOR_GREEN] = "\033[1;32m"; clg_color_table[COLOR_GREEN] = "\033[1;32m";
clg_color_table[COLOR_YELLOW] = "\033[1;33m"; clg_color_table[COLOR_YELLOW] = "\033[1;33m";
clg_color_table[COLOR_RESET] = "\033[0m"; clg_color_table[COLOR_RESET] = "\033[0m";
#endif #endif
} }
} }
static const char *clg_severity_str[CLG_SEVERITY_LEN] = { static const char *clg_severity_str[CLG_SEVERITY_LEN] = {
[CLG_SEVERITY_INFO] = "INFO", [CLG_SEVERITY_INFO] = "INFO",
[CLG_SEVERITY_WARN] = "WARN", [CLG_SEVERITY_WARN] = "WARN",
[CLG_SEVERITY_ERROR] = "ERROR", [CLG_SEVERITY_ERROR] = "ERROR",
[CLG_SEVERITY_FATAL] = "FATAL", [CLG_SEVERITY_FATAL] = "FATAL",
}; };
static const char *clg_severity_as_text(enum CLG_Severity severity) static const char *clg_severity_as_text(enum CLG_Severity severity)
{ {
bool ok = (unsigned int)severity < CLG_SEVERITY_LEN; bool ok = (unsigned int)severity < CLG_SEVERITY_LEN;
assert(ok); assert(ok);
if (ok) { if (ok) {
return clg_severity_str[severity]; return clg_severity_str[severity];
} }
else { else {
return "INVALID_SEVERITY"; return "INVALID_SEVERITY";
} }
} }
static enum eCLogColor clg_severity_to_color(enum CLG_Severity severity) static enum eCLogColor clg_severity_to_color(enum CLG_Severity severity)
{ {
assert((unsigned int)severity < CLG_SEVERITY_LEN); assert((unsigned int)severity < CLG_SEVERITY_LEN);
enum eCLogColor color = COLOR_DEFAULT; enum eCLogColor color = COLOR_DEFAULT;
switch (severity) { switch (severity) {
case CLG_SEVERITY_INFO: case CLG_SEVERITY_INFO:
color = COLOR_DEFAULT; color = COLOR_DEFAULT;
break; break;
case CLG_SEVERITY_WARN: case CLG_SEVERITY_WARN:
color = COLOR_YELLOW; color = COLOR_YELLOW;
break; break;
case CLG_SEVERITY_ERROR: case CLG_SEVERITY_ERROR:
case CLG_SEVERITY_FATAL: case CLG_SEVERITY_FATAL:
color = COLOR_RED; color = COLOR_RED;
break; break;
default: default:
/* should never get here. */ /* should never get here. */
assert(false); assert(false);
} }
return color; return color;
} }
/** \} */ /** \} */
@@ -295,27 +294,24 @@ static enum eCLogColor clg_severity_to_color(enum CLG_Severity severity)
*/ */
static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier) static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier)
{ {
const int identifier_len = strlen(identifier); const int identifier_len = strlen(identifier);
for (uint i = 0; i < 2; i++) { for (uint i = 0; i < 2; i++) {
const CLG_IDFilter *flt = ctx->filters[i]; const CLG_IDFilter *flt = ctx->filters[i];
while (flt != NULL) { while (flt != NULL) {
const int len = strlen(flt->match); const int len = strlen(flt->match);
if (STREQ(flt->match, "*") || if (STREQ(flt->match, "*") || ((len == identifier_len) && (STREQ(identifier, flt->match)))) {
((len == identifier_len) && (STREQ(identifier, flt->match)))) return (bool)i;
{ }
return (bool)i; if ((len >= 2) && (STREQLEN(".*", &flt->match[len - 2], 2))) {
} if (((identifier_len == len - 2) && STREQLEN(identifier, flt->match, len - 2)) ||
if ((len >= 2) && (STREQLEN(".*", &flt->match[len - 2], 2))) { ((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1))) {
if (((identifier_len == len - 2) && STREQLEN(identifier, flt->match, len - 2)) || return (bool)i;
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1))) }
{ }
return (bool)i; flt = flt->next;
} }
} }
flt = flt->next; return false;
}
}
return false;
} }
/** /**
@@ -324,58 +320,58 @@ static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier)
*/ */
static CLG_LogType *clg_ctx_type_find_by_name(CLogContext *ctx, const char *identifier) static CLG_LogType *clg_ctx_type_find_by_name(CLogContext *ctx, const char *identifier)
{ {
for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) { for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) {
if (STREQ(identifier, ty->identifier)) { if (STREQ(identifier, ty->identifier)) {
return ty; return ty;
} }
} }
return NULL; return NULL;
} }
static CLG_LogType *clg_ctx_type_register(CLogContext *ctx, const char *identifier) static CLG_LogType *clg_ctx_type_register(CLogContext *ctx, const char *identifier)
{ {
assert(clg_ctx_type_find_by_name(ctx, identifier) == NULL); assert(clg_ctx_type_find_by_name(ctx, identifier) == NULL);
CLG_LogType *ty = MEM_callocN(sizeof(*ty), __func__); CLG_LogType *ty = MEM_callocN(sizeof(*ty), __func__);
ty->next = ctx->types; ty->next = ctx->types;
ctx->types = ty; ctx->types = ty;
strncpy(ty->identifier, identifier, sizeof(ty->identifier) - 1); strncpy(ty->identifier, identifier, sizeof(ty->identifier) - 1);
ty->ctx = ctx; ty->ctx = ctx;
ty->level = ctx->default_type.level; ty->level = ctx->default_type.level;
if (clg_ctx_filter_check(ctx, ty->identifier)) { if (clg_ctx_filter_check(ctx, ty->identifier)) {
ty->flag |= CLG_FLAG_USE; ty->flag |= CLG_FLAG_USE;
} }
return ty; return ty;
} }
static void clg_ctx_fatal_action(CLogContext *ctx) static void clg_ctx_fatal_action(CLogContext *ctx)
{ {
if (ctx->callbacks.fatal_fn != NULL) { if (ctx->callbacks.fatal_fn != NULL) {
ctx->callbacks.fatal_fn(ctx->output_file); ctx->callbacks.fatal_fn(ctx->output_file);
} }
fflush(ctx->output_file); fflush(ctx->output_file);
abort(); abort();
} }
static void clg_ctx_backtrace(CLogContext *ctx) static void clg_ctx_backtrace(CLogContext *ctx)
{ {
/* Note: we avoid writing fo 'FILE', for backtrace we make an exception, /* Note: we avoid writing fo 'FILE', for backtrace we make an exception,
* if necessary we could have a version of the callback that writes to file descriptor all at once. */ * if necessary we could have a version of the callback that writes to file descriptor all at once. */
ctx->callbacks.backtrace_fn(ctx->output_file); ctx->callbacks.backtrace_fn(ctx->output_file);
fflush(ctx->output_file); fflush(ctx->output_file);
} }
static uint64_t clg_timestamp_ticks_get(void) static uint64_t clg_timestamp_ticks_get(void)
{ {
uint64_t tick; uint64_t tick;
#if defined(_MSC_VER) #if defined(_MSC_VER)
tick = GetTickCount64(); tick = GetTickCount64();
#else #else
struct timeval tv; struct timeval tv;
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
tick = tv.tv_sec * 1000 + tv.tv_usec / 1000; tick = tv.tv_sec * 1000 + tv.tv_usec / 1000;
#endif #endif
return tick; return tick;
} }
/** \} */ /** \} */
@@ -386,131 +382,140 @@ static uint64_t clg_timestamp_ticks_get(void)
static void write_timestamp(CLogStringBuf *cstr, const uint64_t timestamp_tick_start) static void write_timestamp(CLogStringBuf *cstr, const uint64_t timestamp_tick_start)
{ {
char timestamp_str[64]; char timestamp_str[64];
const uint64_t timestamp = clg_timestamp_ticks_get() - timestamp_tick_start; const uint64_t timestamp = clg_timestamp_ticks_get() - timestamp_tick_start;
const uint timestamp_len = snprintf( const uint timestamp_len = snprintf(timestamp_str,
timestamp_str, sizeof(timestamp_str), "%" PRIu64 ".%03u ", sizeof(timestamp_str),
timestamp / 1000, (uint)(timestamp % 1000)); "%" PRIu64 ".%03u ",
clg_str_append_with_len(cstr, timestamp_str, timestamp_len); timestamp / 1000,
(uint)(timestamp % 1000));
clg_str_append_with_len(cstr, timestamp_str, timestamp_len);
} }
static void write_severity(CLogStringBuf *cstr, enum CLG_Severity severity, bool use_color) static void write_severity(CLogStringBuf *cstr, enum CLG_Severity severity, bool use_color)
{ {
assert((unsigned int)severity < CLG_SEVERITY_LEN); assert((unsigned int)severity < CLG_SEVERITY_LEN);
if (use_color) { if (use_color) {
enum eCLogColor color = clg_severity_to_color(severity); enum eCLogColor color = clg_severity_to_color(severity);
clg_str_append(cstr, clg_color_table[color]); clg_str_append(cstr, clg_color_table[color]);
clg_str_append(cstr, clg_severity_as_text(severity)); clg_str_append(cstr, clg_severity_as_text(severity));
clg_str_append(cstr, clg_color_table[COLOR_RESET]); clg_str_append(cstr, clg_color_table[COLOR_RESET]);
} }
else { else {
clg_str_append(cstr, clg_severity_as_text(severity)); clg_str_append(cstr, clg_severity_as_text(severity));
} }
} }
static void write_type(CLogStringBuf *cstr, CLG_LogType *lg) static void write_type(CLogStringBuf *cstr, CLG_LogType *lg)
{ {
clg_str_append(cstr, " ("); clg_str_append(cstr, " (");
clg_str_append(cstr, lg->identifier); clg_str_append(cstr, lg->identifier);
clg_str_append(cstr, "): "); clg_str_append(cstr, "): ");
} }
static void write_file_line_fn(CLogStringBuf *cstr, const char *file_line, const char *fn, const bool use_basename) static void write_file_line_fn(CLogStringBuf *cstr,
const char *file_line,
const char *fn,
const bool use_basename)
{ {
uint file_line_len = strlen(file_line); uint file_line_len = strlen(file_line);
if (use_basename) { if (use_basename) {
uint file_line_offset = file_line_len; uint file_line_offset = file_line_len;
while (file_line_offset-- > 0) { while (file_line_offset-- > 0) {
if (file_line[file_line_offset] == PATHSEP_CHAR) { if (file_line[file_line_offset] == PATHSEP_CHAR) {
file_line_offset++; file_line_offset++;
break; break;
} }
} }
file_line += file_line_offset; file_line += file_line_offset;
file_line_len -= file_line_offset; file_line_len -= file_line_offset;
} }
clg_str_append_with_len(cstr, file_line, file_line_len); clg_str_append_with_len(cstr, file_line, file_line_len);
clg_str_append(cstr, " ");
clg_str_append(cstr, " "); clg_str_append(cstr, fn);
clg_str_append(cstr, fn); clg_str_append(cstr, ": ");
clg_str_append(cstr, ": ");
} }
void CLG_log_str( void CLG_log_str(CLG_LogType *lg,
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn, enum CLG_Severity severity,
const char *message) const char *file_line,
const char *fn,
const char *message)
{ {
CLogStringBuf cstr; CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT]; char cstr_stack_buf[CLOG_BUF_LEN_INIT];
clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf)); clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf));
if (lg->ctx->use_timestamp) { if (lg->ctx->use_timestamp) {
write_timestamp(&cstr, lg->ctx->timestamp_tick_start); write_timestamp(&cstr, lg->ctx->timestamp_tick_start);
} }
write_severity(&cstr, severity, lg->ctx->use_color); write_severity(&cstr, severity, lg->ctx->use_color);
write_type(&cstr, lg); write_type(&cstr, lg);
{ {
write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename); write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename);
clg_str_append(&cstr, message); clg_str_append(&cstr, message);
} }
clg_str_append(&cstr, "\n"); clg_str_append(&cstr, "\n");
/* could be optional */ /* could be optional */
int bytes_written = write(lg->ctx->output, cstr.data, cstr.len); int bytes_written = write(lg->ctx->output, cstr.data, cstr.len);
(void)bytes_written; (void)bytes_written;
clg_str_free(&cstr); clg_str_free(&cstr);
if (lg->ctx->callbacks.backtrace_fn) { if (lg->ctx->callbacks.backtrace_fn) {
clg_ctx_backtrace(lg->ctx); clg_ctx_backtrace(lg->ctx);
} }
if (severity == CLG_SEVERITY_FATAL) { if (severity == CLG_SEVERITY_FATAL) {
clg_ctx_fatal_action(lg->ctx); clg_ctx_fatal_action(lg->ctx);
} }
} }
void CLG_logf( void CLG_logf(CLG_LogType *lg,
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn, enum CLG_Severity severity,
const char *fmt, ...) const char *file_line,
const char *fn,
const char *fmt,
...)
{ {
CLogStringBuf cstr; CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT]; char cstr_stack_buf[CLOG_BUF_LEN_INIT];
clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf)); clg_str_init(&cstr, cstr_stack_buf, sizeof(cstr_stack_buf));
if (lg->ctx->use_timestamp) { if (lg->ctx->use_timestamp) {
write_timestamp(&cstr, lg->ctx->timestamp_tick_start); write_timestamp(&cstr, lg->ctx->timestamp_tick_start);
} }
write_severity(&cstr, severity, lg->ctx->use_color); write_severity(&cstr, severity, lg->ctx->use_color);
write_type(&cstr, lg); write_type(&cstr, lg);
{ {
write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename); write_file_line_fn(&cstr, file_line, fn, lg->ctx->use_basename);
va_list ap; va_list ap;
va_start(ap, fmt); va_start(ap, fmt);
clg_str_vappendf(&cstr, fmt, ap); clg_str_vappendf(&cstr, fmt, ap);
va_end(ap); va_end(ap);
} }
clg_str_append(&cstr, "\n"); clg_str_append(&cstr, "\n");
/* could be optional */ /* could be optional */
int bytes_written = write(lg->ctx->output, cstr.data, cstr.len); int bytes_written = write(lg->ctx->output, cstr.data, cstr.len);
(void)bytes_written; (void)bytes_written;
clg_str_free(&cstr); clg_str_free(&cstr);
if (lg->ctx->callbacks.backtrace_fn) { if (lg->ctx->callbacks.backtrace_fn) {
clg_ctx_backtrace(lg->ctx); clg_ctx_backtrace(lg->ctx);
} }
if (severity == CLG_SEVERITY_FATAL) { if (severity == CLG_SEVERITY_FATAL) {
clg_ctx_fatal_action(lg->ctx); clg_ctx_fatal_action(lg->ctx);
} }
} }
/** \} */ /** \} */
@@ -521,99 +526,105 @@ void CLG_logf(
static void CLG_ctx_output_set(CLogContext *ctx, void *file_handle) static void CLG_ctx_output_set(CLogContext *ctx, void *file_handle)
{ {
ctx->output_file = file_handle; ctx->output_file = file_handle;
ctx->output = fileno(ctx->output_file); ctx->output = fileno(ctx->output_file);
#if defined(__unix__) || defined(__APPLE__) #if defined(__unix__) || defined(__APPLE__)
ctx->use_color = isatty(ctx->output); ctx->use_color = isatty(ctx->output);
#endif #endif
} }
static void CLG_ctx_output_use_basename_set(CLogContext *ctx, int value) static void CLG_ctx_output_use_basename_set(CLogContext *ctx, int value)
{ {
ctx->use_basename = (bool)value; ctx->use_basename = (bool)value;
} }
static void CLG_ctx_output_use_timestamp_set(CLogContext *ctx, int value) static void CLG_ctx_output_use_timestamp_set(CLogContext *ctx, int value)
{ {
ctx->use_timestamp = (bool)value; ctx->use_timestamp = (bool)value;
if (ctx->use_timestamp) { if (ctx->use_timestamp) {
ctx->timestamp_tick_start = clg_timestamp_ticks_get(); ctx->timestamp_tick_start = clg_timestamp_ticks_get();
} }
} }
/** Action on fatal severity. */ /** Action on fatal severity. */
static void CLG_ctx_fatal_fn_set(CLogContext *ctx, void (*fatal_fn)(void *file_handle)) static void CLG_ctx_fatal_fn_set(CLogContext *ctx, void (*fatal_fn)(void *file_handle))
{ {
ctx->callbacks.fatal_fn = fatal_fn; ctx->callbacks.fatal_fn = fatal_fn;
} }
static void CLG_ctx_backtrace_fn_set(CLogContext *ctx, void (*backtrace_fn)(void *file_handle)) static void CLG_ctx_backtrace_fn_set(CLogContext *ctx, void (*backtrace_fn)(void *file_handle))
{ {
ctx->callbacks.backtrace_fn = backtrace_fn; ctx->callbacks.backtrace_fn = backtrace_fn;
} }
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list, const char *type_match, int type_match_len) static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list,
const char *type_match,
int type_match_len)
{ {
if (type_match_len == 0) { if (type_match_len == 0) {
return; return;
} }
CLG_IDFilter *flt = MEM_callocN(sizeof(*flt) + (type_match_len + 1), __func__); CLG_IDFilter *flt = MEM_callocN(sizeof(*flt) + (type_match_len + 1), __func__);
flt->next = *flt_list; flt->next = *flt_list;
*flt_list = flt; *flt_list = flt;
memcpy(flt->match, type_match, type_match_len); memcpy(flt->match, type_match, type_match_len);
/* no need to null terminate since we calloc'd */ /* no need to null terminate since we calloc'd */
} }
static void CLG_ctx_type_filter_exclude(CLogContext *ctx, const char *type_match, int type_match_len) static void CLG_ctx_type_filter_exclude(CLogContext *ctx,
const char *type_match,
int type_match_len)
{ {
clg_ctx_type_filter_append(&ctx->filters[0], type_match, type_match_len); clg_ctx_type_filter_append(&ctx->filters[0], type_match, type_match_len);
} }
static void CLG_ctx_type_filter_include(CLogContext *ctx, const char *type_match, int type_match_len) static void CLG_ctx_type_filter_include(CLogContext *ctx,
const char *type_match,
int type_match_len)
{ {
clg_ctx_type_filter_append(&ctx->filters[1], type_match, type_match_len); clg_ctx_type_filter_append(&ctx->filters[1], type_match, type_match_len);
} }
static void CLG_ctx_level_set(CLogContext *ctx, int level) static void CLG_ctx_level_set(CLogContext *ctx, int level)
{ {
ctx->default_type.level = level; ctx->default_type.level = level;
for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) { for (CLG_LogType *ty = ctx->types; ty; ty = ty->next) {
ty->level = level; ty->level = level;
} }
} }
static CLogContext *CLG_ctx_init(void) static CLogContext *CLG_ctx_init(void)
{ {
CLogContext *ctx = MEM_callocN(sizeof(*ctx), __func__); CLogContext *ctx = MEM_callocN(sizeof(*ctx), __func__);
#ifdef WITH_CLOG_PTHREADS #ifdef WITH_CLOG_PTHREADS
pthread_mutex_init(&ctx->types_lock, NULL); pthread_mutex_init(&ctx->types_lock, NULL);
#endif #endif
ctx->use_color = true; ctx->use_color = true;
ctx->default_type.level = 1; ctx->default_type.level = 1;
CLG_ctx_output_set(ctx, stdout); CLG_ctx_output_set(ctx, stdout);
return ctx; return ctx;
} }
static void CLG_ctx_free(CLogContext *ctx) static void CLG_ctx_free(CLogContext *ctx)
{ {
while (ctx->types != NULL) { while (ctx->types != NULL) {
CLG_LogType *item = ctx->types; CLG_LogType *item = ctx->types;
ctx->types = item->next; ctx->types = item->next;
MEM_freeN(item); MEM_freeN(item);
} }
for (uint i = 0; i < 2; i++) { for (uint i = 0; i < 2; i++) {
while (ctx->filters[i] != NULL) { while (ctx->filters[i] != NULL) {
CLG_IDFilter *item = ctx->filters[i]; CLG_IDFilter *item = ctx->filters[i];
ctx->filters[i] = item->next; ctx->filters[i] = item->next;
MEM_freeN(item); MEM_freeN(item);
} }
} }
#ifdef WITH_CLOG_PTHREADS #ifdef WITH_CLOG_PTHREADS
pthread_mutex_destroy(&ctx->types_lock); pthread_mutex_destroy(&ctx->types_lock);
#endif #endif
MEM_freeN(ctx); MEM_freeN(ctx);
} }
/** \} */ /** \} */
@@ -629,57 +640,56 @@ static struct CLogContext *g_ctx = NULL;
void CLG_init(void) void CLG_init(void)
{ {
g_ctx = CLG_ctx_init(); g_ctx = CLG_ctx_init();
clg_color_table_init(g_ctx->use_color); clg_color_table_init(g_ctx->use_color);
} }
void CLG_exit(void) void CLG_exit(void)
{ {
CLG_ctx_free(g_ctx); CLG_ctx_free(g_ctx);
} }
void CLG_output_set(void *file_handle) void CLG_output_set(void *file_handle)
{ {
CLG_ctx_output_set(g_ctx, file_handle); CLG_ctx_output_set(g_ctx, file_handle);
} }
void CLG_output_use_basename_set(int value) void CLG_output_use_basename_set(int value)
{ {
CLG_ctx_output_use_basename_set(g_ctx, value); CLG_ctx_output_use_basename_set(g_ctx, value);
} }
void CLG_output_use_timestamp_set(int value) void CLG_output_use_timestamp_set(int value)
{ {
CLG_ctx_output_use_timestamp_set(g_ctx, value); CLG_ctx_output_use_timestamp_set(g_ctx, value);
} }
void CLG_fatal_fn_set(void (*fatal_fn)(void *file_handle)) void CLG_fatal_fn_set(void (*fatal_fn)(void *file_handle))
{ {
CLG_ctx_fatal_fn_set(g_ctx, fatal_fn); CLG_ctx_fatal_fn_set(g_ctx, fatal_fn);
} }
void CLG_backtrace_fn_set(void (*fatal_fn)(void *file_handle)) void CLG_backtrace_fn_set(void (*fatal_fn)(void *file_handle))
{ {
CLG_ctx_backtrace_fn_set(g_ctx, fatal_fn); CLG_ctx_backtrace_fn_set(g_ctx, fatal_fn);
} }
void CLG_type_filter_exclude(const char *type_match, int type_match_len) void CLG_type_filter_exclude(const char *type_match, int type_match_len)
{ {
CLG_ctx_type_filter_exclude(g_ctx, type_match, type_match_len); CLG_ctx_type_filter_exclude(g_ctx, type_match, type_match_len);
} }
void CLG_type_filter_include(const char *type_match, int type_match_len) void CLG_type_filter_include(const char *type_match, int type_match_len)
{ {
CLG_ctx_type_filter_include(g_ctx, type_match, type_match_len); CLG_ctx_type_filter_include(g_ctx, type_match, type_match_len);
} }
void CLG_level_set(int level) void CLG_level_set(int level)
{ {
CLG_ctx_level_set(g_ctx, level); CLG_ctx_level_set(g_ctx, level);
} }
/** \} */ /** \} */
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
@@ -690,22 +700,22 @@ void CLG_level_set(int level)
void CLG_logref_init(CLG_LogRef *clg_ref) void CLG_logref_init(CLG_LogRef *clg_ref)
{ {
#ifdef WITH_CLOG_PTHREADS #ifdef WITH_CLOG_PTHREADS
/* Only runs once when initializing a static type in most cases. */ /* Only runs once when initializing a static type in most cases. */
pthread_mutex_lock(&g_ctx->types_lock); pthread_mutex_lock(&g_ctx->types_lock);
#endif #endif
if (clg_ref->type == NULL) { if (clg_ref->type == NULL) {
CLG_LogType *clg_ty = clg_ctx_type_find_by_name(g_ctx, clg_ref->identifier); CLG_LogType *clg_ty = clg_ctx_type_find_by_name(g_ctx, clg_ref->identifier);
if (clg_ty == NULL) { if (clg_ty == NULL) {
clg_ty = clg_ctx_type_register(g_ctx, clg_ref->identifier); clg_ty = clg_ctx_type_register(g_ctx, clg_ref->identifier);
} }
#ifdef WITH_CLOG_PTHREADS #ifdef WITH_CLOG_PTHREADS
atomic_cas_ptr((void **)&clg_ref->type, clg_ref->type, clg_ty); atomic_cas_ptr((void **)&clg_ref->type, clg_ref->type, clg_ty);
#else #else
clg_ref->type = clg_ty; clg_ref->type = clg_ty;
#endif #endif
} }
#ifdef WITH_CLOG_PTHREADS #ifdef WITH_CLOG_PTHREADS
pthread_mutex_unlock(&g_ctx->types_lock); pthread_mutex_unlock(&g_ctx->types_lock);
#endif #endif
} }

View File

@@ -1,16 +1,16 @@
# Standalone or with Blender # Standalone or with Blender
if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE) if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE)
set(CYCLES_INSTALL_PATH "") set(CYCLES_INSTALL_PATH "")
else() else()
set(WITH_CYCLES_BLENDER ON) set(WITH_CYCLES_BLENDER ON)
# WINDOWS_PYTHON_DEBUG needs to write into the user addons folder since it will # WINDOWS_PYTHON_DEBUG needs to write into the user addons folder since it will
# be started with --env-system-scripts pointing to the release folder, which will # be started with --env-system-scripts pointing to the release folder, which will
# lack the cycles addon, and we don't want to write into it. # lack the cycles addon, and we don't want to write into it.
if(NOT WINDOWS_PYTHON_DEBUG) if(NOT WINDOWS_PYTHON_DEBUG)
set(CYCLES_INSTALL_PATH "scripts/addons/cycles") set(CYCLES_INSTALL_PATH "scripts/addons/cycles")
else() else()
set(CYCLES_INSTALL_PATH "$ENV{appdata}/blender foundation/blender/${BLENDER_VERSION}/scripts/addons/cycles") set(CYCLES_INSTALL_PATH "$ENV{appdata}/blender foundation/blender/${BLENDER_VERSION}/scripts/addons/cycles")
endif() endif()
endif() endif()
# External Libraries # External Libraries
@@ -23,329 +23,329 @@ include(cmake/macros.cmake)
# note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm) # note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm)
if(WITH_CYCLES_NATIVE_ONLY) if(WITH_CYCLES_NATIVE_ONLY)
set(CXX_HAS_SSE FALSE) set(CXX_HAS_SSE FALSE)
set(CXX_HAS_AVX FALSE) set(CXX_HAS_AVX FALSE)
set(CXX_HAS_AVX2 FALSE) set(CXX_HAS_AVX2 FALSE)
add_definitions( add_definitions(
-DWITH_KERNEL_NATIVE -DWITH_KERNEL_NATIVE
) )
if(NOT MSVC) if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CYCLES_KERNEL_FLAGS "-march=native") set(CYCLES_KERNEL_FLAGS "-march=native")
endif() endif()
elseif(NOT WITH_CPU_SSE) elseif(NOT WITH_CPU_SSE)
set(CXX_HAS_SSE FALSE) set(CXX_HAS_SSE FALSE)
set(CXX_HAS_AVX FALSE) set(CXX_HAS_AVX FALSE)
set(CXX_HAS_AVX2 FALSE) set(CXX_HAS_AVX2 FALSE)
elseif(WIN32 AND MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") elseif(WIN32 AND MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CXX_HAS_SSE TRUE) set(CXX_HAS_SSE TRUE)
set(CXX_HAS_AVX TRUE) set(CXX_HAS_AVX TRUE)
set(CXX_HAS_AVX2 TRUE) set(CXX_HAS_AVX2 TRUE)
# /arch:AVX for VC2012 and above # /arch:AVX for VC2012 and above
if(NOT MSVC_VERSION LESS 1700) if(NOT MSVC_VERSION LESS 1700)
set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX") set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX")
set(CYCLES_AVX2_ARCH_FLAGS "/arch:AVX /arch:AVX2") set(CYCLES_AVX2_ARCH_FLAGS "/arch:AVX /arch:AVX2")
elseif(NOT CMAKE_CL_64) elseif(NOT CMAKE_CL_64)
set(CYCLES_AVX_ARCH_FLAGS "/arch:SSE2") set(CYCLES_AVX_ARCH_FLAGS "/arch:SSE2")
set(CYCLES_AVX2_ARCH_FLAGS "/arch:SSE2") set(CYCLES_AVX2_ARCH_FLAGS "/arch:SSE2")
endif() endif()
# Unlike GCC/clang we still use fast math, because there is no fine # Unlike GCC/clang we still use fast math, because there is no fine
# grained control and the speedup we get here is too big to ignore. # grained control and the speedup we get here is too big to ignore.
set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
# there is no /arch:SSE3, but intrinsics are available anyway # there is no /arch:SSE3, but intrinsics are available anyway
if(CMAKE_CL_64) if(CMAKE_CL_64)
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}") set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}") set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}") set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}") set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}") set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
else() else()
set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}") set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}") set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}") set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}") set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}") set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox")
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
check_cxx_compiler_flag(-msse CXX_HAS_SSE) check_cxx_compiler_flag(-msse CXX_HAS_SSE)
check_cxx_compiler_flag(-mavx CXX_HAS_AVX) check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2) check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
# Assume no signal trapping for better code generation. # Assume no signal trapping for better code generation.
set(CYCLES_KERNEL_FLAGS "-fno-trapping-math") set(CYCLES_KERNEL_FLAGS "-fno-trapping-math")
# Avoid overhead of setting errno for NaNs. # Avoid overhead of setting errno for NaNs.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-math-errno") set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-math-errno")
# Let compiler optimize 0.0 - x without worrying about signed zeros. # Let compiler optimize 0.0 - x without worrying about signed zeros.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signed-zeros") set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signed-zeros")
if(CMAKE_COMPILER_IS_GNUCC) if(CMAKE_COMPILER_IS_GNUCC)
# Assume no signal trapping for better code generation. # Assume no signal trapping for better code generation.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signaling-nans") set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-signaling-nans")
# Assume a fixed rounding mode for better constant folding. # Assume a fixed rounding mode for better constant folding.
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-rounding-math") set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -fno-rounding-math")
endif() endif()
if(CXX_HAS_SSE) if(CXX_HAS_SSE)
if(CMAKE_COMPILER_IS_GNUCC) if(CMAKE_COMPILER_IS_GNUCC)
set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -mfpmath=sse") set(CYCLES_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -mfpmath=sse")
endif() endif()
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2") set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3") set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1") set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1")
if(CXX_HAS_AVX) if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx") set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx")
endif() endif()
if(CXX_HAS_AVX2) if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c") set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif() endif()
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CYCLES_KERNEL_FLAGS}")
elseif(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Intel") elseif(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Intel")
check_cxx_compiler_flag(/QxSSE2 CXX_HAS_SSE) check_cxx_compiler_flag(/QxSSE2 CXX_HAS_SSE)
check_cxx_compiler_flag(/arch:AVX CXX_HAS_AVX) check_cxx_compiler_flag(/arch:AVX CXX_HAS_AVX)
check_cxx_compiler_flag(/QxCORE-AVX2 CXX_HAS_AVX2) check_cxx_compiler_flag(/QxCORE-AVX2 CXX_HAS_AVX2)
if(CXX_HAS_SSE) if(CXX_HAS_SSE)
set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2") set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2")
set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3") set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3")
set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1") set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1")
if(CXX_HAS_AVX) if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX") set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX")
endif() endif()
if(CXX_HAS_AVX2) if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2") set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2")
endif() endif()
endif() endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
if(APPLE) if(APPLE)
# ICC does not support SSE2 flag on MacOSX # ICC does not support SSE2 flag on MacOSX
check_cxx_compiler_flag(-xssse3 CXX_HAS_SSE) check_cxx_compiler_flag(-xssse3 CXX_HAS_SSE)
else() else()
check_cxx_compiler_flag(-xsse2 CXX_HAS_SSE) check_cxx_compiler_flag(-xsse2 CXX_HAS_SSE)
endif() endif()
check_cxx_compiler_flag(-xavx CXX_HAS_AVX) check_cxx_compiler_flag(-xavx CXX_HAS_AVX)
check_cxx_compiler_flag(-xcore-avx2 CXX_HAS_AVX2) check_cxx_compiler_flag(-xcore-avx2 CXX_HAS_AVX2)
if(CXX_HAS_SSE) if(CXX_HAS_SSE)
if(APPLE) if(APPLE)
# ICC does not support SSE2 flag on MacOSX # ICC does not support SSE2 flag on MacOSX
set(CYCLES_SSE2_KERNEL_FLAGS "-xssse3") set(CYCLES_SSE2_KERNEL_FLAGS "-xssse3")
else() else()
set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2") set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2")
endif() endif()
set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3") set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1") set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1")
if(CXX_HAS_AVX) if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "-xavx") set(CYCLES_AVX_KERNEL_FLAGS "-xavx")
endif() endif()
if(CXX_HAS_AVX2) if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2") set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2")
endif() endif()
endif() endif()
endif() endif()
if(CXX_HAS_SSE) if(CXX_HAS_SSE)
add_definitions( add_definitions(
-DWITH_KERNEL_SSE2 -DWITH_KERNEL_SSE2
-DWITH_KERNEL_SSE3 -DWITH_KERNEL_SSE3
-DWITH_KERNEL_SSE41 -DWITH_KERNEL_SSE41
) )
endif() endif()
if(CXX_HAS_AVX) if(CXX_HAS_AVX)
add_definitions(-DWITH_KERNEL_AVX) add_definitions(-DWITH_KERNEL_AVX)
endif() endif()
if(CXX_HAS_AVX2) if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2) add_definitions(-DWITH_KERNEL_AVX2)
endif() endif()
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
if(WIN32 AND MSVC) if(WIN32 AND MSVC)
set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif() endif()
endif() endif()
# Definitions and Includes # Definitions and Includes
add_definitions( add_definitions(
${BOOST_DEFINITIONS} ${BOOST_DEFINITIONS}
${OPENIMAGEIO_DEFINITIONS} ${OPENIMAGEIO_DEFINITIONS}
) )
add_definitions( add_definitions(
-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ { -DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {
-DCCL_NAMESPACE_END=} -DCCL_NAMESPACE_END=}
) )
if(WITH_CYCLES_STANDALONE_GUI) if(WITH_CYCLES_STANDALONE_GUI)
add_definitions(-DWITH_CYCLES_STANDALONE_GUI) add_definitions(-DWITH_CYCLES_STANDALONE_GUI)
endif() endif()
if(WITH_CYCLES_PTEX) if(WITH_CYCLES_PTEX)
add_definitions(-DWITH_PTEX) add_definitions(-DWITH_PTEX)
endif() endif()
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
add_definitions(-DWITH_OSL) add_definitions(-DWITH_OSL)
#osl 1.9.x #osl 1.9.x
add_definitions(-DOSL_STATIC_BUILD) add_definitions(-DOSL_STATIC_BUILD)
#pre 1.9 #pre 1.9
add_definitions(-DOSL_STATIC_LIBRARY) add_definitions(-DOSL_STATIC_LIBRARY)
include_directories( include_directories(
SYSTEM SYSTEM
${OSL_INCLUDE_DIR} ${OSL_INCLUDE_DIR}
) )
endif() endif()
if(WITH_CYCLES_EMBREE) if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE) add_definitions(-DWITH_EMBREE)
add_definitions(-DEMBREE_STATIC_LIB) add_definitions(-DEMBREE_STATIC_LIB)
include_directories( include_directories(
SYSTEM SYSTEM
${EMBREE_INCLUDE_DIRS} ${EMBREE_INCLUDE_DIRS}
) )
endif() endif()
if(WITH_OPENSUBDIV) if(WITH_OPENSUBDIV)
add_definitions(-DWITH_OPENSUBDIV) add_definitions(-DWITH_OPENSUBDIV)
include_directories( include_directories(
SYSTEM SYSTEM
${OPENSUBDIV_INCLUDE_DIR} ${OPENSUBDIV_INCLUDE_DIR}
) )
endif() endif()
if(WITH_CYCLES_STANDALONE) if(WITH_CYCLES_STANDALONE)
set(WITH_CYCLES_DEVICE_OPENCL TRUE) set(WITH_CYCLES_DEVICE_OPENCL TRUE)
set(WITH_CYCLES_DEVICE_CUDA TRUE) set(WITH_CYCLES_DEVICE_CUDA TRUE)
# Experimental and unfinished. # Experimental and unfinished.
set(WITH_CYCLES_NETWORK FALSE) set(WITH_CYCLES_NETWORK FALSE)
endif() endif()
# TODO(sergey): Consider removing it, only causes confusion in interface. # TODO(sergey): Consider removing it, only causes confusion in interface.
set(WITH_CYCLES_DEVICE_MULTI TRUE) set(WITH_CYCLES_DEVICE_MULTI TRUE)
# Logging capabilities using GLog library. # Logging capabilities using GLog library.
if(WITH_CYCLES_LOGGING) if(WITH_CYCLES_LOGGING)
add_definitions(-DWITH_CYCLES_LOGGING) add_definitions(-DWITH_CYCLES_LOGGING)
add_definitions(${GLOG_DEFINES}) add_definitions(${GLOG_DEFINES})
add_definitions(-DCYCLES_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE}) add_definitions(-DCYCLES_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE})
include_directories( include_directories(
SYSTEM SYSTEM
${GLOG_INCLUDE_DIRS} ${GLOG_INCLUDE_DIRS}
${GFLAGS_INCLUDE_DIRS} ${GFLAGS_INCLUDE_DIRS}
) )
endif() endif()
# Debugging capabilities (debug passes etc). # Debugging capabilities (debug passes etc).
if(WITH_CYCLES_DEBUG) if(WITH_CYCLES_DEBUG)
add_definitions(-DWITH_CYCLES_DEBUG) add_definitions(-DWITH_CYCLES_DEBUG)
endif() endif()
if(NOT OPENIMAGEIO_PUGIXML_FOUND) if(NOT OPENIMAGEIO_PUGIXML_FOUND)
add_definitions(-DWITH_SYSTEM_PUGIXML) add_definitions(-DWITH_SYSTEM_PUGIXML)
endif() endif()
include_directories( include_directories(
SYSTEM SYSTEM
${BOOST_INCLUDE_DIR} ${BOOST_INCLUDE_DIR}
${OPENIMAGEIO_INCLUDE_DIRS} ${OPENIMAGEIO_INCLUDE_DIRS}
${OPENIMAGEIO_INCLUDE_DIRS}/OpenImageIO ${OPENIMAGEIO_INCLUDE_DIRS}/OpenImageIO
${OPENEXR_INCLUDE_DIR} ${OPENEXR_INCLUDE_DIR}
${OPENEXR_INCLUDE_DIRS} ${OPENEXR_INCLUDE_DIRS}
${PUGIXML_INCLUDE_DIR} ${PUGIXML_INCLUDE_DIR}
) )
if(CYCLES_STANDALONE_REPOSITORY) if(CYCLES_STANDALONE_REPOSITORY)
include_directories(../third_party/atomic) include_directories(../third_party/atomic)
else() else()
include_directories(../atomic) include_directories(../atomic)
endif() endif()
# Warnings # Warnings
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion") ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion") ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros") ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_no_error_unused_macros "-Wno-error=unused-macros")
unset(_has_cxxflag_float_conversion) unset(_has_cxxflag_float_conversion)
unset(_has_cxxflag_double_promotion) unset(_has_cxxflag_double_promotion)
unset(_has_no_error_unused_macros) unset(_has_no_error_unused_macros)
endif() endif()
if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
if(MSVC) if(MSVC)
set(MAX_MSVC 1800) set(MAX_MSVC 1800)
if(${CUDA_VERSION} EQUAL "8.0") if(${CUDA_VERSION} EQUAL "8.0")
set(MAX_MSVC 1900) set(MAX_MSVC 1900)
elseif(${CUDA_VERSION} EQUAL "9.0") elseif(${CUDA_VERSION} EQUAL "9.0")
set(MAX_MSVC 1910) set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1") elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911) set(MAX_MSVC 1911)
elseif(${CUDA_VERSION} EQUAL "10.0") elseif(${CUDA_VERSION} EQUAL "10.0")
set(MAX_MSVC 1999) set(MAX_MSVC 1999)
elseif(${CUDA_VERSION} EQUAL "10.1") elseif(${CUDA_VERSION} EQUAL "10.1")
set(MAX_MSVC 1999) set(MAX_MSVC 1999)
endif() endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang") if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.") message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON) set(WITH_CYCLES_CUBIN_COMPILER ON)
endif() endif()
unset(MAX_MSVC) unset(MAX_MSVC)
elseif(APPLE) elseif(APPLE)
if(NOT (${XCODE_VERSION} VERSION_LESS 10.0)) if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.") message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON) set(WITH_CYCLES_CUBIN_COMPILER ON)
endif() endif()
endif() endif()
endif() endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC. # NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER) if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0)) if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.") message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF) set(WITH_CYCLES_CUBIN_COMPILER OFF)
endif() endif()
endif() endif()
# Subdirectories # Subdirectories
if(WITH_CYCLES_BLENDER) if(WITH_CYCLES_BLENDER)
add_definitions(-DWITH_BLENDER_GUARDEDALLOC) add_definitions(-DWITH_BLENDER_GUARDEDALLOC)
add_subdirectory(blender) add_subdirectory(blender)
endif() endif()
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK) add_definitions(-DWITH_NETWORK)
endif() endif()
if(WITH_OPENCOLORIO) if(WITH_OPENCOLORIO)
add_definitions(-DWITH_OCIO) add_definitions(-DWITH_OCIO)
include_directories( include_directories(
SYSTEM SYSTEM
${OPENCOLORIO_INCLUDE_DIRS} ${OPENCOLORIO_INCLUDE_DIRS}
) )
endif() endif()
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER) if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
add_subdirectory(app) add_subdirectory(app)
endif() endif()
add_subdirectory(bvh) add_subdirectory(bvh)
@@ -359,9 +359,9 @@ add_subdirectory(util)
# TODO(sergey): Make this to work with standalone repository. # TODO(sergey): Make this to work with standalone repository.
if(WITH_GTESTS) if(WITH_GTESTS)
add_subdirectory(test) add_subdirectory(test)
endif() endif()
if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE) if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE)
delayed_do_install(${CMAKE_BINARY_DIR}/bin) delayed_do_install(${CMAKE_BINARY_DIR}/bin)
endif() endif()

View File

@@ -1,6 +1,6 @@
set(INC set(INC
.. ..
) )
set(INC_SYS set(INC_SYS
) )
@@ -8,46 +8,46 @@ set(INC_SYS
# NOTE: LIBRARIES contains all the libraries which are common # NOTE: LIBRARIES contains all the libraries which are common
# across release and debug build types, stored in a linking order. # across release and debug build types, stored in a linking order.
set(LIBRARIES set(LIBRARIES
cycles_device cycles_device
cycles_kernel cycles_kernel
cycles_render cycles_render
cycles_bvh cycles_bvh
cycles_subd cycles_subd
cycles_graph cycles_graph
cycles_util cycles_util
${BLENDER_GL_LIBRARIES} ${BLENDER_GL_LIBRARIES}
${CYCLES_APP_GLEW_LIBRARY} ${CYCLES_APP_GLEW_LIBRARY}
${PNG_LIBRARIES} ${PNG_LIBRARIES}
${JPEG_LIBRARIES} ${JPEG_LIBRARIES}
${ZLIB_LIBRARIES} ${ZLIB_LIBRARIES}
${TIFF_LIBRARY} ${TIFF_LIBRARY}
${PTHREADS_LIBRARIES} ${PTHREADS_LIBRARIES}
extern_clew extern_clew
) )
if(WITH_CUDA_DYNLOAD) if(WITH_CUDA_DYNLOAD)
list(APPEND LIBRARIES extern_cuew) list(APPEND LIBRARIES extern_cuew)
else() else()
list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY}) list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY})
endif() endif()
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
list(APPEND LIBRARIES cycles_kernel_osl) list(APPEND LIBRARIES cycles_kernel_osl)
endif() endif()
if(NOT CYCLES_STANDALONE_REPOSITORY) if(NOT CYCLES_STANDALONE_REPOSITORY)
list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc bf_intern_numaapi) list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc bf_intern_numaapi)
endif() endif()
if(WITH_CYCLES_LOGGING) if(WITH_CYCLES_LOGGING)
list(APPEND LIBRARIES list(APPEND LIBRARIES
${GLOG_LIBRARIES} ${GLOG_LIBRARIES}
${GFLAGS_LIBRARIES} ${GFLAGS_LIBRARIES}
) )
endif() endif()
if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
list(APPEND LIBRARIES ${GLUT_LIBRARIES}) list(APPEND LIBRARIES ${GLUT_LIBRARIES})
endif() endif()
# Common configuration. # Common configuration.
@@ -62,7 +62,7 @@ link_directories(${OPENIMAGEIO_LIBPATH}
${OPENJPEG_LIBPATH}) ${OPENJPEG_LIBPATH})
if(WITH_OPENCOLORIO) if(WITH_OPENCOLORIO)
link_directories(${OPENCOLORIO_LIBPATH}) link_directories(${OPENCOLORIO_LIBPATH})
endif() endif()
add_definitions(${GL_DEFINITIONS}) add_definitions(${GL_DEFINITIONS})
@@ -78,90 +78,90 @@ include_directories(SYSTEM ${INC_SYS})
# #
# TODO(sergey): Think of a better place for this? # TODO(sergey): Think of a better place for this?
macro(cycles_target_link_libraries target) macro(cycles_target_link_libraries target)
target_link_libraries(${target} ${LIBRARIES}) target_link_libraries(${target} ${LIBRARIES})
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES}) target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES})
endif() endif()
if(WITH_CYCLES_EMBREE) if(WITH_CYCLES_EMBREE)
target_link_libraries(${target} ${EMBREE_LIBRARIES}) target_link_libraries(${target} ${EMBREE_LIBRARIES})
endif() endif()
if(WITH_OPENSUBDIV) if(WITH_OPENSUBDIV)
target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES}) target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
endif() endif()
if(WITH_OPENCOLORIO) if(WITH_OPENCOLORIO)
target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES}) target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
endif() endif()
target_link_libraries( target_link_libraries(
${target} ${target}
${OPENIMAGEIO_LIBRARIES} ${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES} ${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES} ${OPENJPEG_LIBRARIES}
${PUGIXML_LIBRARIES} ${PUGIXML_LIBRARIES}
${BOOST_LIBRARIES} ${BOOST_LIBRARIES}
${CMAKE_DL_LIBS} ${CMAKE_DL_LIBS}
${PLATFORM_LINKLIBS} ${PLATFORM_LINKLIBS}
) )
endmacro() endmacro()
# Application build targets # Application build targets
if(WITH_CYCLES_STANDALONE) if(WITH_CYCLES_STANDALONE)
set(SRC set(SRC
cycles_standalone.cpp cycles_standalone.cpp
cycles_xml.cpp cycles_xml.cpp
cycles_xml.h cycles_xml.h
) )
add_executable(cycles ${SRC}) add_executable(cycles ${SRC})
cycles_target_link_libraries(cycles) cycles_target_link_libraries(cycles)
if(UNIX AND NOT APPLE) if(UNIX AND NOT APPLE)
set_target_properties(cycles PROPERTIES INSTALL_RPATH $ORIGIN/lib) set_target_properties(cycles PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif() endif()
unset(SRC) unset(SRC)
endif() endif()
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)
set(SRC set(SRC
cycles_server.cpp cycles_server.cpp
) )
add_executable(cycles_server ${SRC}) add_executable(cycles_server ${SRC})
cycles_target_link_libraries(cycles_server) cycles_target_link_libraries(cycles_server)
if(UNIX AND NOT APPLE) if(UNIX AND NOT APPLE)
set_target_properties(cycles_server PROPERTIES INSTALL_RPATH $ORIGIN/lib) set_target_properties(cycles_server PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif() endif()
unset(SRC) unset(SRC)
endif() endif()
if(WITH_CYCLES_CUBIN_COMPILER) if(WITH_CYCLES_CUBIN_COMPILER)
# 32 bit windows is special, nvrtc is not supported on x86, so even # 32 bit windows is special, nvrtc is not supported on x86, so even
# though we are building 32 bit blender a 64 bit cubin_cc will have # though we are building 32 bit blender a 64 bit cubin_cc will have
# to be build to compile the cubins. # to be build to compile the cubins.
if(MSVC AND NOT CMAKE_CL_64) if(MSVC AND NOT CMAKE_CL_64)
message("Building with CUDA not supported on 32 bit, skipped") message("Building with CUDA not supported on 32 bit, skipped")
set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
else() else()
set(SRC set(SRC
cycles_cubin_cc.cpp cycles_cubin_cc.cpp
) )
set(INC set(INC
../../../extern/cuew/include ../../../extern/cuew/include
) )
add_executable(cycles_cubin_cc ${SRC}) add_executable(cycles_cubin_cc ${SRC})
include_directories(${INC}) include_directories(${INC})
target_link_libraries(cycles_cubin_cc target_link_libraries(cycles_cubin_cc
extern_cuew extern_cuew
${OPENIMAGEIO_LIBRARIES} ${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES} ${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES} ${OPENJPEG_LIBRARIES}
${PUGIXML_LIBRARIES} ${PUGIXML_LIBRARIES}
${BOOST_LIBRARIES} ${BOOST_LIBRARIES}
${PLATFORM_LINKLIBS} ${PLATFORM_LINKLIBS}
) )
if(NOT CYCLES_STANDALONE_REPOSITORY) if(NOT CYCLES_STANDALONE_REPOSITORY)
target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc) target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc)
endif() endif()
unset(SRC) unset(SRC)
unset(INC) unset(INC)
endif() endif()
endif() endif()

View File

@@ -26,272 +26,286 @@
#include "cuew.h" #include "cuew.h"
#ifdef _MSC_VER #ifdef _MSC_VER
# include <Windows.h> # include <Windows.h>
#endif #endif
using std::string; using std::string;
using std::vector; using std::vector;
namespace std { namespace std {
template<typename T> template<typename T> std::string to_string(const T &n)
std::string to_string(const T &n) {
std::ostringstream s;
s << n;
return s.str();
}
}
class CompilationSettings
{ {
public: std::ostringstream s;
CompilationSettings() s << n;
: target_arch(0), return s.str();
bits(64), }
verbose(false), } // namespace std
fast_math(false)
{}
string cuda_toolkit_dir; class CompilationSettings {
string input_file; public:
string output_file; CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
string ptx_file; {
vector<string> defines; }
vector<string> includes;
int target_arch; string cuda_toolkit_dir;
int bits; string input_file;
bool verbose; string output_file;
bool fast_math; string ptx_file;
vector<string> defines;
vector<string> includes;
int target_arch;
int bits;
bool verbose;
bool fast_math;
}; };
static bool compile_cuda(CompilationSettings &settings) static bool compile_cuda(CompilationSettings &settings)
{ {
const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"}; const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
const char* header_content[] = {"\n", "\n", "\n", "\n"}; const char *header_content[] = {"\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str()); printf("Building %s\n", settings.input_file.c_str());
string code; string code;
if(!OIIO::Filesystem::read_text_file(settings.input_file, code)) { if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str()); fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
return false; return false;
} }
vector<string> options; vector<string> options;
for(size_t i = 0; i < settings.includes.size(); i++) { for (size_t i = 0; i < settings.includes.size(); i++) {
options.push_back("-I" + settings.includes[i]); options.push_back("-I" + settings.includes[i]);
} }
for(size_t i = 0; i < settings.defines.size(); i++) { for (size_t i = 0; i < settings.defines.size(); i++) {
options.push_back("-D" + settings.defines[i]); options.push_back("-D" + settings.defines[i]);
} }
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion())); options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch)); options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space"); options.push_back("--device-as-default-execution-space");
if(settings.fast_math) if (settings.fast_math)
options.push_back("--use_fast_math"); options.push_back("--use_fast_math");
nvrtcProgram prog; nvrtcProgram prog;
nvrtcResult result = nvrtcCreateProgram(&prog, nvrtcResult result = nvrtcCreateProgram(&prog,
code.c_str(), // buffer code.c_str(), // buffer
NULL, // name NULL, // name
sizeof(headers) / sizeof(void*), // numHeaders sizeof(headers) / sizeof(void *), // numHeaders
header_content, // headers header_content, // headers
headers); // includeNames headers); // includeNames
if(result != NVRTC_SUCCESS) { if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result); fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
return false; return false;
} }
/* Tranfer options to a classic C array. */ /* Tranfer options to a classic C array. */
vector<const char*> opts(options.size()); vector<const char *> opts(options.size());
for(size_t i = 0; i < options.size(); i++) { for (size_t i = 0; i < options.size(); i++) {
opts[i] = options[i].c_str(); opts[i] = options[i].c_str();
} }
result = nvrtcCompileProgram(prog, options.size(), &opts[0]); result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
if(result != NVRTC_SUCCESS) { if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result); fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
size_t log_size; size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size); nvrtcGetProgramLogSize(prog, &log_size);
vector<char> log(log_size); vector<char> log(log_size);
nvrtcGetProgramLog(prog, &log[0]); nvrtcGetProgramLog(prog, &log[0]);
fprintf(stderr, "%s\n", &log[0]); fprintf(stderr, "%s\n", &log[0]);
return false; return false;
} }
/* Retrieve the ptx code. */ /* Retrieve the ptx code. */
size_t ptx_size; size_t ptx_size;
result = nvrtcGetPTXSize(prog, &ptx_size); result = nvrtcGetPTXSize(prog, &ptx_size);
if(result != NVRTC_SUCCESS) { if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result); fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
return false; return false;
} }
vector<char> ptx_code(ptx_size); vector<char> ptx_code(ptx_size);
result = nvrtcGetPTX(prog, &ptx_code[0]); result = nvrtcGetPTX(prog, &ptx_code[0]);
if(result != NVRTC_SUCCESS) { if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result); fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false; return false;
} }
/* Write a file in the temp folder with the ptx code. */ /* Write a file in the temp folder with the ptx code. */
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path(); settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
FILE * f= fopen(settings.ptx_file.c_str(), "wb"); OIIO::Filesystem::unique_path();
fwrite(&ptx_code[0], 1, ptx_size, f); FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fclose(f); fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
return true; return true;
} }
static bool link_ptxas(CompilationSettings &settings) static bool link_ptxas(CompilationSettings &settings)
{ {
string cudapath = ""; string cudapath = "";
if(settings.cuda_toolkit_dir.size()) if (settings.cuda_toolkit_dir.size())
cudapath = settings.cuda_toolkit_dir + "/bin/"; cudapath = settings.cuda_toolkit_dir + "/bin/";
string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file + string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
" -o " + settings.output_file + " --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
" --gpu-name sm_" + std::to_string(settings.target_arch) + std::to_string(settings.bits);
" -m" + std::to_string(settings.bits);
if(settings.verbose) { if (settings.verbose) {
ptx += " --verbose"; ptx += " --verbose";
printf("%s\n", ptx.c_str()); printf("%s\n", ptx.c_str());
} }
int pxresult = system(ptx.c_str()); int pxresult = system(ptx.c_str());
if(pxresult) { if (pxresult) {
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult); fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
return false; return false;
} }
if(!OIIO::Filesystem::remove(settings.ptx_file)) { if (!OIIO::Filesystem::remove(settings.ptx_file)) {
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str()); fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
} }
return true; return true;
} }
static bool init(CompilationSettings &settings) static bool init(CompilationSettings &settings)
{ {
#ifdef _MSC_VER #ifdef _MSC_VER
if(settings.cuda_toolkit_dir.size()) { if (settings.cuda_toolkit_dir.size()) {
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str()); SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
} }
#else #else
(void)settings; (void)settings;
#endif #endif
int cuewresult = cuewInit(CUEW_INIT_NVRTC); int cuewresult = cuewInit(CUEW_INIT_NVRTC);
if(cuewresult != CUEW_SUCCESS) { if (cuewresult != CUEW_SUCCESS) {
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult); fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
return false; return false;
} }
if(cuewNvrtcVersion() < 80) { if (cuewNvrtcVersion() < 80) {
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion()); fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
return false; return false;
} }
if(!nvrtcCreateProgram) { if (!nvrtcCreateProgram) {
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n"); fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
return false; return false;
} }
if(!nvrtcCompileProgram) { if (!nvrtcCompileProgram) {
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n"); fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
return false; return false;
} }
if(!nvrtcGetProgramLogSize) { if (!nvrtcGetProgramLogSize) {
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n"); fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
return false; return false;
} }
if(!nvrtcGetProgramLog) { if (!nvrtcGetProgramLog) {
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n"); fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
return false; return false;
} }
if(!nvrtcGetPTXSize) { if (!nvrtcGetPTXSize) {
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n"); fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
return false; return false;
} }
if(!nvrtcGetPTX) { if (!nvrtcGetPTX) {
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n"); fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
return false; return false;
} }
return true; return true;
} }
static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings) static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
{ {
OIIO::ArgParse ap; OIIO::ArgParse ap;
ap.options("Usage: cycles_cubin_cc [options]", ap.options("Usage: cycles_cubin_cc [options]",
"-target %d", &settings.target_arch, "target shader model", "-target %d",
"-m %d", &settings.bits, "Cuda architecture bits", &settings.target_arch,
"-i %s", &settings.input_file, "Input source filename", "target shader model",
"-o %s", &settings.output_file, "Output cubin filename", "-m %d",
"-I %L", &settings.includes, "Add additional includepath", &settings.bits,
"-D %L", &settings.defines, "Add additional defines", "Cuda architecture bits",
"-v", &settings.verbose, "Use verbose logging", "-i %s",
"--use_fast_math", &settings.fast_math, "Use fast math", &settings.input_file,
"-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory", "Input source filename",
NULL); "-o %s",
&settings.output_file,
"Output cubin filename",
"-I %L",
&settings.includes,
"Add additional includepath",
"-D %L",
&settings.defines,
"Add additional defines",
"-v",
&settings.verbose,
"Use verbose logging",
"--use_fast_math",
&settings.fast_math,
"Use fast math",
"-cuda-toolkit-dir %s",
&settings.cuda_toolkit_dir,
"path to the cuda toolkit binary directory",
NULL);
if(ap.parse(argc, argv) < 0) { if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str()); fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage(); ap.usage();
return false; return false;
} }
if(!settings.output_file.size()) { if (!settings.output_file.size()) {
fprintf(stderr, "Error: Output file not set(-o), required\n\n"); fprintf(stderr, "Error: Output file not set(-o), required\n\n");
return false; return false;
} }
if(!settings.input_file.size()) { if (!settings.input_file.size()) {
fprintf(stderr, "Error: Input file not set(-i, required\n\n"); fprintf(stderr, "Error: Input file not set(-i, required\n\n");
return false; return false;
} }
if(!settings.target_arch) { if (!settings.target_arch) {
fprintf(stderr, "Error: target shader model not set (-target), required\n\n"); fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
return false; return false;
} }
return true; return true;
} }
int main(int argc, const char **argv) int main(int argc, const char **argv)
{ {
CompilationSettings settings; CompilationSettings settings;
if(!parse_parameters(argc, argv, settings)) { if (!parse_parameters(argc, argv, settings)) {
fprintf(stderr, "Error: invalid parameters, exiting\n"); fprintf(stderr, "Error: invalid parameters, exiting\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(!init(settings)) { if (!init(settings)) {
fprintf(stderr, "Error: initialization error, exiting\n"); fprintf(stderr, "Error: initialization error, exiting\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(!compile_cuda(settings)) { if (!compile_cuda(settings)) {
fprintf(stderr, "Error: compilation error, exiting\n"); fprintf(stderr, "Error: compilation error, exiting\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(!link_ptxas(settings)) { if (!link_ptxas(settings)) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
return 0; return 0;
} }

View File

@@ -30,85 +30,93 @@ using namespace ccl;
int main(int argc, const char **argv) int main(int argc, const char **argv)
{ {
util_logging_init(argv[0]); util_logging_init(argv[0]);
path_init(); path_init();
/* device types */ /* device types */
string devicelist = ""; string devicelist = "";
string devicename = "cpu"; string devicename = "cpu";
bool list = false, debug = false; bool list = false, debug = false;
int threads = 0, verbosity = 1; int threads = 0, verbosity = 1;
vector<DeviceType>& types = Device::available_types(); vector<DeviceType> &types = Device::available_types();
foreach(DeviceType type, types) { foreach (DeviceType type, types) {
if(devicelist != "") if (devicelist != "")
devicelist += ", "; devicelist += ", ";
devicelist += Device::string_from_type(type); devicelist += Device::string_from_type(type);
} }
/* parse options */ /* parse options */
ArgParse ap; ArgParse ap;
ap.options ("Usage: cycles_server [options]", ap.options("Usage: cycles_server [options]",
"--device %s", &devicename, ("Devices to use: " + devicelist).c_str(), "--device %s",
"--list-devices", &list, "List information about all available devices", &devicename,
"--threads %d", &threads, "Number of threads to use for CPU device", ("Devices to use: " + devicelist).c_str(),
"--list-devices",
&list,
"List information about all available devices",
"--threads %d",
&threads,
"Number of threads to use for CPU device",
#ifdef WITH_CYCLES_LOGGING #ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging", "--debug",
"--verbose %d", &verbosity, "Set verbosity of the logger", &debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif #endif
NULL); NULL);
if(ap.parse(argc, argv) < 0) { if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str()); fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage(); ap.usage();
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(debug) { if (debug) {
util_logging_start(); util_logging_start();
util_logging_verbosity_set(verbosity); util_logging_verbosity_set(verbosity);
} }
if(list) { if (list) {
vector<DeviceInfo>& devices = Device::available_devices(); vector<DeviceInfo> &devices = Device::available_devices();
printf("Devices:\n"); printf("Devices:\n");
foreach(DeviceInfo& info, devices) { foreach (DeviceInfo &info, devices) {
printf(" %s%s\n", printf(" %s%s\n", info.description.c_str(), (info.display_device) ? " (display)" : "");
info.description.c_str(), }
(info.display_device)? " (display)": "");
}
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
/* find matching device */ /* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str()); DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo>& devices = Device::available_devices(); vector<DeviceInfo> &devices = Device::available_devices();
DeviceInfo device_info; DeviceInfo device_info;
foreach(DeviceInfo& device, devices) { foreach (DeviceInfo &device, devices) {
if(device_type == device.type) { if (device_type == device.type) {
device_info = device; device_info = device;
break; break;
} }
} }
TaskScheduler::init(threads); TaskScheduler::init(threads);
while(1) { while (1) {
Stats stats; Stats stats;
Device *device = Device::create(device_info, stats, true); Device *device = Device::create(device_info, stats, true);
printf("Cycles Server with device: %s\n", device->info.description.c_str()); printf("Cycles Server with device: %s\n", device->info.description.c_str());
device->server_run(); device->server_run();
delete device; delete device;
} }
TaskScheduler::exit(); TaskScheduler::exit();
return 0; return 0;
} }

View File

@@ -36,7 +36,7 @@
#include "util/util_version.h" #include "util/util_version.h"
#ifdef WITH_CYCLES_STANDALONE_GUI #ifdef WITH_CYCLES_STANDALONE_GUI
#include "util/util_view.h" # include "util/util_view.h"
#endif #endif
#include "app/cycles_xml.h" #include "app/cycles_xml.h"
@@ -44,447 +44,494 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
struct Options { struct Options {
Session *session; Session *session;
Scene *scene; Scene *scene;
string filepath; string filepath;
int width, height; int width, height;
SceneParams scene_params; SceneParams scene_params;
SessionParams session_params; SessionParams session_params;
bool quiet; bool quiet;
bool show_help, interactive, pause; bool show_help, interactive, pause;
string output_path; string output_path;
} options; } options;
static void session_print(const string& str) static void session_print(const string &str)
{ {
/* print with carriage return to overwrite previous */ /* print with carriage return to overwrite previous */
printf("\r%s", str.c_str()); printf("\r%s", str.c_str());
/* add spaces to overwrite longer previous print */ /* add spaces to overwrite longer previous print */
static int maxlen = 0; static int maxlen = 0;
int len = str.size(); int len = str.size();
maxlen = max(len, maxlen); maxlen = max(len, maxlen);
for(int i = len; i < maxlen; i++) for (int i = len; i < maxlen; i++)
printf(" "); printf(" ");
/* flush because we don't write an end of line */ /* flush because we don't write an end of line */
fflush(stdout); fflush(stdout);
} }
static void session_print_status() static void session_print_status()
{ {
string status, substatus; string status, substatus;
/* get status */ /* get status */
float progress = options.session->progress.get_progress(); float progress = options.session->progress.get_progress();
options.session->progress.get_status(status, substatus); options.session->progress.get_status(status, substatus);
if(substatus != "") if (substatus != "")
status += ": " + substatus; status += ": " + substatus;
/* print status */ /* print status */
status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str()); status = string_printf("Progress %05.2f %s", (double)progress * 100, status.c_str());
session_print(status); session_print(status);
} }
static bool write_render(const uchar *pixels, int w, int h, int channels) static bool write_render(const uchar *pixels, int w, int h, int channels)
{ {
string msg = string_printf("Writing image %s", options.output_path.c_str()); string msg = string_printf("Writing image %s", options.output_path.c_str());
session_print(msg); session_print(msg);
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path)); unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
if(!out) { if (!out) {
return false; return false;
} }
ImageSpec spec(w, h, channels, TypeDesc::UINT8); ImageSpec spec(w, h, channels, TypeDesc::UINT8);
if(!out->open(options.output_path, spec)) { if (!out->open(options.output_path, spec)) {
return false; return false;
} }
/* conversion for different top/bottom convention */ /* conversion for different top/bottom convention */
out->write_image(TypeDesc::UINT8, out->write_image(
pixels + (h - 1) * w * channels, TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
AutoStride,
-w * channels,
AutoStride);
out->close(); out->close();
return true; return true;
} }
static BufferParams& session_buffer_params() static BufferParams &session_buffer_params()
{ {
static BufferParams buffer_params; static BufferParams buffer_params;
buffer_params.width = options.width; buffer_params.width = options.width;
buffer_params.height = options.height; buffer_params.height = options.height;
buffer_params.full_width = options.width; buffer_params.full_width = options.width;
buffer_params.full_height = options.height; buffer_params.full_height = options.height;
return buffer_params; return buffer_params;
} }
static void scene_init() static void scene_init()
{ {
options.scene = new Scene(options.scene_params, options.session->device); options.scene = new Scene(options.scene_params, options.session->device);
/* Read XML */ /* Read XML */
xml_read_file(options.scene, options.filepath.c_str()); xml_read_file(options.scene, options.filepath.c_str());
/* Camera width/height override? */ /* Camera width/height override? */
if(!(options.width == 0 || options.height == 0)) { if (!(options.width == 0 || options.height == 0)) {
options.scene->camera->width = options.width; options.scene->camera->width = options.width;
options.scene->camera->height = options.height; options.scene->camera->height = options.height;
} }
else { else {
options.width = options.scene->camera->width; options.width = options.scene->camera->width;
options.height = options.scene->camera->height; options.height = options.scene->camera->height;
} }
/* Calculate Viewplane */ /* Calculate Viewplane */
options.scene->camera->compute_auto_viewplane(); options.scene->camera->compute_auto_viewplane();
} }
static void session_init() static void session_init()
{ {
options.session_params.write_render_cb = write_render; options.session_params.write_render_cb = write_render;
options.session = new Session(options.session_params); options.session = new Session(options.session_params);
if(options.session_params.background && !options.quiet) if (options.session_params.background && !options.quiet)
options.session->progress.set_update_callback(function_bind(&session_print_status)); options.session->progress.set_update_callback(function_bind(&session_print_status));
#ifdef WITH_CYCLES_STANDALONE_GUI #ifdef WITH_CYCLES_STANDALONE_GUI
else else
options.session->progress.set_update_callback(function_bind(&view_redraw)); options.session->progress.set_update_callback(function_bind(&view_redraw));
#endif #endif
/* load scene */ /* load scene */
scene_init(); scene_init();
options.session->scene = options.scene; options.session->scene = options.scene;
options.session->reset(session_buffer_params(), options.session_params.samples); options.session->reset(session_buffer_params(), options.session_params.samples);
options.session->start(); options.session->start();
} }
static void session_exit() static void session_exit()
{ {
if(options.session) { if (options.session) {
delete options.session; delete options.session;
options.session = NULL; options.session = NULL;
} }
if(options.session_params.background && !options.quiet) { if (options.session_params.background && !options.quiet) {
session_print("Finished Rendering."); session_print("Finished Rendering.");
printf("\n"); printf("\n");
} }
} }
#ifdef WITH_CYCLES_STANDALONE_GUI #ifdef WITH_CYCLES_STANDALONE_GUI
static void display_info(Progress& progress) static void display_info(Progress &progress)
{ {
static double latency = 0.0; static double latency = 0.0;
static double last = 0; static double last = 0;
double elapsed = time_dt(); double elapsed = time_dt();
string str, interactive; string str, interactive;
latency = (elapsed - last); latency = (elapsed - last);
last = elapsed; last = elapsed;
double total_time, sample_time; double total_time, sample_time;
string status, substatus; string status, substatus;
progress.get_time(total_time, sample_time); progress.get_time(total_time, sample_time);
progress.get_status(status, substatus); progress.get_status(status, substatus);
float progress_val = progress.get_progress(); float progress_val = progress.get_progress();
if(substatus != "") if (substatus != "")
status += ": " + substatus; status += ": " + substatus;
interactive = options.interactive? "On":"Off"; interactive = options.interactive ? "On" : "Off";
str = string_printf( str = string_printf(
"%s" "%s"
" Time: %.2f" " Time: %.2f"
" Latency: %.4f" " Latency: %.4f"
" Progress: %05.2f" " Progress: %05.2f"
" Average: %.4f" " Average: %.4f"
" Interactive: %s", " Interactive: %s",
status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str()); status.c_str(),
total_time,
latency,
(double)progress_val * 100,
sample_time,
interactive.c_str());
view_display_info(str.c_str()); view_display_info(str.c_str());
if(options.show_help) if (options.show_help)
view_display_help(); view_display_help();
} }
static void display() static void display()
{ {
static DeviceDrawParams draw_params = DeviceDrawParams(); static DeviceDrawParams draw_params = DeviceDrawParams();
options.session->draw(session_buffer_params(), draw_params); options.session->draw(session_buffer_params(), draw_params);
display_info(options.session->progress); display_info(options.session->progress);
} }
static void motion(int x, int y, int button) static void motion(int x, int y, int button)
{ {
if(options.interactive) { if (options.interactive) {
Transform matrix = options.session->scene->camera->matrix; Transform matrix = options.session->scene->camera->matrix;
/* Translate */ /* Translate */
if(button == 0) { if (button == 0) {
float3 translate = make_float3(x * 0.01f, -(y * 0.01f), 0.0f); float3 translate = make_float3(x * 0.01f, -(y * 0.01f), 0.0f);
matrix = matrix * transform_translate(translate); matrix = matrix * transform_translate(translate);
} }
/* Rotate */ /* Rotate */
else if(button == 2) { else if (button == 2) {
float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f); float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w)); matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w));
float4 r2 = make_float4(y * 0.1f, 1.0f, 0.0f, 0.0f); float4 r2 = make_float4(y * 0.1f, 1.0f, 0.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r2.x), make_float3(r2.y, r2.z, r2.w)); matrix = matrix * transform_rotate(DEG2RADF(r2.x), make_float3(r2.y, r2.z, r2.w));
} }
/* Update and Reset */ /* Update and Reset */
options.session->scene->camera->matrix = matrix; options.session->scene->camera->matrix = matrix;
options.session->scene->camera->need_update = true; options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true; options.session->scene->camera->need_device_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples); options.session->reset(session_buffer_params(), options.session_params.samples);
} }
} }
static void resize(int width, int height) static void resize(int width, int height)
{ {
options.width = width; options.width = width;
options.height = height; options.height = height;
if(options.session) { if (options.session) {
/* Update camera */ /* Update camera */
options.session->scene->camera->width = width; options.session->scene->camera->width = width;
options.session->scene->camera->height = height; options.session->scene->camera->height = height;
options.session->scene->camera->compute_auto_viewplane(); options.session->scene->camera->compute_auto_viewplane();
options.session->scene->camera->need_update = true; options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true; options.session->scene->camera->need_device_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples); options.session->reset(session_buffer_params(), options.session_params.samples);
} }
} }
static void keyboard(unsigned char key) static void keyboard(unsigned char key)
{ {
/* Toggle help */ /* Toggle help */
if(key == 'h') if (key == 'h')
options.show_help = !(options.show_help); options.show_help = !(options.show_help);
/* Reset */ /* Reset */
else if(key == 'r') else if (key == 'r')
options.session->reset(session_buffer_params(), options.session_params.samples); options.session->reset(session_buffer_params(), options.session_params.samples);
/* Cancel */ /* Cancel */
else if(key == 27) // escape else if (key == 27) // escape
options.session->progress.set_cancel("Canceled"); options.session->progress.set_cancel("Canceled");
/* Pause */ /* Pause */
else if(key == 'p') { else if (key == 'p') {
options.pause = !options.pause; options.pause = !options.pause;
options.session->set_pause(options.pause); options.session->set_pause(options.pause);
} }
/* Interactive Mode */ /* Interactive Mode */
else if(key == 'i') else if (key == 'i')
options.interactive = !(options.interactive); options.interactive = !(options.interactive);
/* Navigation */ /* Navigation */
else if(options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) { else if (options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) {
Transform matrix = options.session->scene->camera->matrix; Transform matrix = options.session->scene->camera->matrix;
float3 translate; float3 translate;
if(key == 'w') if (key == 'w')
translate = make_float3(0.0f, 0.0f, 0.1f); translate = make_float3(0.0f, 0.0f, 0.1f);
else if(key == 's') else if (key == 's')
translate = make_float3(0.0f, 0.0f, -0.1f); translate = make_float3(0.0f, 0.0f, -0.1f);
else if(key == 'a') else if (key == 'a')
translate = make_float3(-0.1f, 0.0f, 0.0f); translate = make_float3(-0.1f, 0.0f, 0.0f);
else if(key == 'd') else if (key == 'd')
translate = make_float3(0.1f, 0.0f, 0.0f); translate = make_float3(0.1f, 0.0f, 0.0f);
matrix = matrix * transform_translate(translate); matrix = matrix * transform_translate(translate);
/* Update and Reset */ /* Update and Reset */
options.session->scene->camera->matrix = matrix; options.session->scene->camera->matrix = matrix;
options.session->scene->camera->need_update = true; options.session->scene->camera->need_update = true;
options.session->scene->camera->need_device_update = true; options.session->scene->camera->need_device_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples); options.session->reset(session_buffer_params(), options.session_params.samples);
} }
/* Set Max Bounces */ /* Set Max Bounces */
else if(options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) { else if (options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) {
int bounce; int bounce;
switch(key) { switch (key) {
case '0': bounce = 0; break; case '0':
case '1': bounce = 1; break; bounce = 0;
case '2': bounce = 2; break; break;
case '3': bounce = 3; break; case '1':
default: bounce = 0; break; bounce = 1;
} break;
case '2':
bounce = 2;
break;
case '3':
bounce = 3;
break;
default:
bounce = 0;
break;
}
options.session->scene->integrator->max_bounce = bounce; options.session->scene->integrator->max_bounce = bounce;
/* Update and Reset */ /* Update and Reset */
options.session->scene->integrator->need_update = true; options.session->scene->integrator->need_update = true;
options.session->reset(session_buffer_params(), options.session_params.samples); options.session->reset(session_buffer_params(), options.session_params.samples);
} }
} }
#endif #endif
static int files_parse(int argc, const char *argv[]) static int files_parse(int argc, const char *argv[])
{ {
if(argc > 0) if (argc > 0)
options.filepath = argv[0]; options.filepath = argv[0];
return 0; return 0;
} }
static void options_parse(int argc, const char **argv) static void options_parse(int argc, const char **argv)
{ {
options.width = 0; options.width = 0;
options.height = 0; options.height = 0;
options.filepath = ""; options.filepath = "";
options.session = NULL; options.session = NULL;
options.quiet = false; options.quiet = false;
/* device names */ /* device names */
string device_names = ""; string device_names = "";
string devicename = "CPU"; string devicename = "CPU";
bool list = false; bool list = false;
/* List devices for which support is compiled in. */ /* List devices for which support is compiled in. */
vector<DeviceType> types = Device::available_types(); vector<DeviceType> types = Device::available_types();
foreach(DeviceType type, types) { foreach (DeviceType type, types) {
if(device_names != "") if (device_names != "")
device_names += ", "; device_names += ", ";
device_names += Device::string_from_type(type); device_names += Device::string_from_type(type);
} }
/* shading system */ /* shading system */
string ssname = "svm"; string ssname = "svm";
/* parse options */ /* parse options */
ArgParse ap; ArgParse ap;
bool help = false, debug = false, version = false; bool help = false, debug = false, version = false;
int verbosity = 1; int verbosity = 1;
ap.options ("Usage: cycles [options] file.xml", ap.options("Usage: cycles [options] file.xml",
"%*", files_parse, "", "%*",
"--device %s", &devicename, ("Devices to use: " + device_names).c_str(), files_parse,
"",
"--device %s",
&devicename,
("Devices to use: " + device_names).c_str(),
#ifdef WITH_OSL #ifdef WITH_OSL
"--shadingsys %s", &ssname, "Shading system to use: svm, osl", "--shadingsys %s",
&ssname,
"Shading system to use: svm, osl",
#endif #endif
"--background", &options.session_params.background, "Render in background, without user interface", "--background",
"--quiet", &options.quiet, "In background mode, don't print progress messages", &options.session_params.background,
"--samples %d", &options.session_params.samples, "Number of samples to render", "Render in background, without user interface",
"--output %s", &options.output_path, "File path to write output image", "--quiet",
"--threads %d", &options.session_params.threads, "CPU Rendering Threads", &options.quiet,
"--width %d", &options.width, "Window width in pixel", "In background mode, don't print progress messages",
"--height %d", &options.height, "Window height in pixel", "--samples %d",
"--tile-width %d", &options.session_params.tile_size.x, "Tile width in pixels", &options.session_params.samples,
"--tile-height %d", &options.session_params.tile_size.y, "Tile height in pixels", "Number of samples to render",
"--list-devices", &list, "List information about all available devices", "--output %s",
&options.output_path,
"File path to write output image",
"--threads %d",
&options.session_params.threads,
"CPU Rendering Threads",
"--width %d",
&options.width,
"Window width in pixel",
"--height %d",
&options.height,
"Window height in pixel",
"--tile-width %d",
&options.session_params.tile_size.x,
"Tile width in pixels",
"--tile-height %d",
&options.session_params.tile_size.y,
"Tile height in pixels",
"--list-devices",
&list,
"List information about all available devices",
#ifdef WITH_CYCLES_LOGGING #ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging", "--debug",
"--verbose %d", &verbosity, "Set verbosity of the logger", &debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif #endif
"--help", &help, "Print help message", "--help",
"--version", &version, "Print version number", &help,
NULL); "Print help message",
"--version",
&version,
"Print version number",
NULL);
if(ap.parse(argc, argv) < 0) { if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str()); fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage(); ap.usage();
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(debug) { if (debug) {
util_logging_start(); util_logging_start();
util_logging_verbosity_set(verbosity); util_logging_verbosity_set(verbosity);
} }
if(list) { if (list) {
vector<DeviceInfo> devices = Device::available_devices(); vector<DeviceInfo> devices = Device::available_devices();
printf("Devices:\n"); printf("Devices:\n");
foreach(DeviceInfo& info, devices) { foreach (DeviceInfo &info, devices) {
printf(" %-10s%s%s\n", printf(" %-10s%s%s\n",
Device::string_from_type(info.type).c_str(), Device::string_from_type(info.type).c_str(),
info.description.c_str(), info.description.c_str(),
(info.display_device)? " (display)": ""); (info.display_device) ? " (display)" : "");
} }
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
else if(version) { else if (version) {
printf("%s\n", CYCLES_VERSION_STRING); printf("%s\n", CYCLES_VERSION_STRING);
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
else if(help || options.filepath == "") { else if (help || options.filepath == "") {
ap.usage(); ap.usage();
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
if(ssname == "osl") if (ssname == "osl")
options.scene_params.shadingsystem = SHADINGSYSTEM_OSL; options.scene_params.shadingsystem = SHADINGSYSTEM_OSL;
else if(ssname == "svm") else if (ssname == "svm")
options.scene_params.shadingsystem = SHADINGSYSTEM_SVM; options.scene_params.shadingsystem = SHADINGSYSTEM_SVM;
#ifndef WITH_CYCLES_STANDALONE_GUI #ifndef WITH_CYCLES_STANDALONE_GUI
options.session_params.background = true; options.session_params.background = true;
#endif #endif
/* Use progressive rendering */ /* Use progressive rendering */
options.session_params.progressive = true; options.session_params.progressive = true;
/* find matching device */ /* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str()); DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK(device_type)); vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK(device_type));
bool device_available = false; bool device_available = false;
if (!devices.empty()) { if (!devices.empty()) {
options.session_params.device = devices.front(); options.session_params.device = devices.front();
device_available = true; device_available = true;
} }
/* handle invalid configurations */ /* handle invalid configurations */
if(options.session_params.device.type == DEVICE_NONE || !device_available) { if (options.session_params.device.type == DEVICE_NONE || !device_available) {
fprintf(stderr, "Unknown device: %s\n", devicename.c_str()); fprintf(stderr, "Unknown device: %s\n", devicename.c_str());
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
#ifdef WITH_OSL #ifdef WITH_OSL
else if(!(ssname == "osl" || ssname == "svm")) { else if (!(ssname == "osl" || ssname == "svm")) {
fprintf(stderr, "Unknown shading system: %s\n", ssname.c_str()); fprintf(stderr, "Unknown shading system: %s\n", ssname.c_str());
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
else if(options.scene_params.shadingsystem == SHADINGSYSTEM_OSL && options.session_params.device.type != DEVICE_CPU) { else if (options.scene_params.shadingsystem == SHADINGSYSTEM_OSL &&
fprintf(stderr, "OSL shading system only works with CPU device\n"); options.session_params.device.type != DEVICE_CPU) {
exit(EXIT_FAILURE); fprintf(stderr, "OSL shading system only works with CPU device\n");
} exit(EXIT_FAILURE);
}
#endif #endif
else if(options.session_params.samples < 0) { else if (options.session_params.samples < 0) {
fprintf(stderr, "Invalid number of samples: %d\n", options.session_params.samples); fprintf(stderr, "Invalid number of samples: %d\n", options.session_params.samples);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
else if(options.filepath == "") { else if (options.filepath == "") {
fprintf(stderr, "No file path specified\n"); fprintf(stderr, "No file path specified\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
/* For smoother Viewport */ /* For smoother Viewport */
options.session_params.start_resolution = 64; options.session_params.start_resolution = 64;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END
@@ -493,26 +540,33 @@ using namespace ccl;
int main(int argc, const char **argv) int main(int argc, const char **argv)
{ {
util_logging_init(argv[0]); util_logging_init(argv[0]);
path_init(); path_init();
options_parse(argc, argv); options_parse(argc, argv);
#ifdef WITH_CYCLES_STANDALONE_GUI #ifdef WITH_CYCLES_STANDALONE_GUI
if(options.session_params.background) { if (options.session_params.background) {
#endif #endif
session_init(); session_init();
options.session->wait(); options.session->wait();
session_exit(); session_exit();
#ifdef WITH_CYCLES_STANDALONE_GUI #ifdef WITH_CYCLES_STANDALONE_GUI
} }
else { else {
string title = "Cycles: " + path_filename(options.filepath); string title = "Cycles: " + path_filename(options.filepath);
/* init/exit are callback so they run while GL is initialized */ /* init/exit are callback so they run while GL is initialized */
view_main_loop(title.c_str(), options.width, options.height, view_main_loop(title.c_str(),
session_init, session_exit, resize, display, keyboard, motion); options.width,
} options.height,
session_init,
session_exit,
resize,
display,
keyboard,
motion);
}
#endif #endif
return 0; return 0;
} }

File diff suppressed because it is too large Load Diff

View File

@@ -29,4 +29,4 @@ void xml_read_file(Scene *scene, const char *filepath);
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __CYCLES_XML_H__ */ #endif /* __CYCLES_XML_H__ */

View File

@@ -33,4 +33,4 @@ void CCL_logging_verbosity_set(int verbosity);
} }
#endif #endif
#endif /* __CCL_API_H__ */ #endif /* __CCL_API_H__ */

View File

@@ -1,68 +1,68 @@
set(INC set(INC
.. ..
../../glew-mx ../../glew-mx
../../guardedalloc ../../guardedalloc
../../mikktspace ../../mikktspace
../../../source/blender/makesdna ../../../source/blender/makesdna
../../../source/blender/makesrna ../../../source/blender/makesrna
../../../source/blender/blenlib ../../../source/blender/blenlib
${CMAKE_BINARY_DIR}/source/blender/makesrna/intern ${CMAKE_BINARY_DIR}/source/blender/makesrna/intern
) )
set(INC_SYS set(INC_SYS
${PYTHON_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}
${GLEW_INCLUDE_DIR} ${GLEW_INCLUDE_DIR}
) )
set(SRC set(SRC
blender_camera.cpp blender_camera.cpp
blender_device.cpp blender_device.cpp
blender_mesh.cpp blender_mesh.cpp
blender_object.cpp blender_object.cpp
blender_object_cull.cpp blender_object_cull.cpp
blender_particles.cpp blender_particles.cpp
blender_curves.cpp blender_curves.cpp
blender_logging.cpp blender_logging.cpp
blender_python.cpp blender_python.cpp
blender_session.cpp blender_session.cpp
blender_shader.cpp blender_shader.cpp
blender_sync.cpp blender_sync.cpp
blender_texture.cpp blender_texture.cpp
CCL_api.h CCL_api.h
blender_object_cull.h blender_object_cull.h
blender_sync.h blender_sync.h
blender_session.h blender_session.h
blender_texture.h blender_texture.h
blender_util.h blender_util.h
) )
set(LIB set(LIB
cycles_bvh cycles_bvh
cycles_device cycles_device
cycles_graph cycles_graph
cycles_kernel cycles_kernel
cycles_render cycles_render
cycles_subd cycles_subd
cycles_util cycles_util
) )
if(WITH_CYCLES_LOGGING) if(WITH_CYCLES_LOGGING)
list(APPEND LIB list(APPEND LIB
extern_glog extern_glog
) )
endif() endif()
set(ADDON_FILES set(ADDON_FILES
addon/__init__.py addon/__init__.py
addon/engine.py addon/engine.py
addon/operators.py addon/operators.py
addon/osl.py addon/osl.py
addon/presets.py addon/presets.py
addon/properties.py addon/properties.py
addon/ui.py addon/ui.py
addon/version_update.py addon/version_update.py
) )
add_definitions(${GL_DEFINITIONS}) add_definitions(${GL_DEFINITIONS})
@@ -72,14 +72,14 @@ if(WITH_CYCLES_DEVICE_OPENCL)
endif() endif()
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK) add_definitions(-DWITH_NETWORK)
endif() endif()
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# avoid link failure with clang 3.4 debug # avoid link failure with clang 3.4 debug
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4') if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gline-tables-only") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gline-tables-only")
endif() endif()
add_dependencies(bf_intern_cycles bf_rna) add_dependencies(bf_intern_cycles bf_rna)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -19,91 +19,89 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
int blender_device_threads(BL::Scene& b_scene) int blender_device_threads(BL::Scene &b_scene)
{ {
BL::RenderSettings b_r = b_scene.render(); BL::RenderSettings b_r = b_scene.render();
if(b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED) if (b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
return b_r.threads(); return b_r.threads();
else else
return 0; return 0;
} }
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background) DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scene, bool background)
{ {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Default to CPU device. */ /* Default to CPU device. */
DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front(); DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front();
if(get_enum(cscene, "device") == 2) { if (get_enum(cscene, "device") == 2) {
/* Find network device. */ /* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK); vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if(!devices.empty()) { if (!devices.empty()) {
device = devices.front(); device = devices.front();
} }
} }
else if(get_enum(cscene, "device") == 1) { else if (get_enum(cscene, "device") == 1) {
/* Find cycles preferences. */ /* Find cycles preferences. */
PointerRNA cpreferences; PointerRNA cpreferences;
BL::Preferences::addons_iterator b_addon_iter; BL::Preferences::addons_iterator b_addon_iter;
for(b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end(); ++b_addon_iter) { for (b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end();
if(b_addon_iter->module() == "cycles") { ++b_addon_iter) {
cpreferences = b_addon_iter->preferences().ptr; if (b_addon_iter->module() == "cycles") {
break; cpreferences = b_addon_iter->preferences().ptr;
} break;
} }
}
/* Test if we are using GPU devices. */ /* Test if we are using GPU devices. */
enum ComputeDevice { enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0, COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1, COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPENCL = 2, COMPUTE_DEVICE_OPENCL = 2,
COMPUTE_DEVICE_NUM = 3, COMPUTE_DEVICE_NUM = 3,
}; };
ComputeDevice compute_device = (ComputeDevice)get_enum(cpreferences, ComputeDevice compute_device = (ComputeDevice)get_enum(
"compute_device_type", cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
COMPUTE_DEVICE_NUM,
COMPUTE_DEVICE_CPU);
if(compute_device != COMPUTE_DEVICE_CPU) { if (compute_device != COMPUTE_DEVICE_CPU) {
/* Query GPU devices with matching types. */ /* Query GPU devices with matching types. */
uint mask = DEVICE_MASK_CPU; uint mask = DEVICE_MASK_CPU;
if(compute_device == COMPUTE_DEVICE_CUDA) { if (compute_device == COMPUTE_DEVICE_CUDA) {
mask |= DEVICE_MASK_CUDA; mask |= DEVICE_MASK_CUDA;
} }
else if(compute_device == COMPUTE_DEVICE_OPENCL) { else if (compute_device == COMPUTE_DEVICE_OPENCL) {
mask |= DEVICE_MASK_OPENCL; mask |= DEVICE_MASK_OPENCL;
} }
vector<DeviceInfo> devices = Device::available_devices(mask); vector<DeviceInfo> devices = Device::available_devices(mask);
/* Match device preferences and available devices. */ /* Match device preferences and available devices. */
vector<DeviceInfo> used_devices; vector<DeviceInfo> used_devices;
RNA_BEGIN(&cpreferences, device, "devices") { RNA_BEGIN (&cpreferences, device, "devices") {
if(get_boolean(device, "use")) { if (get_boolean(device, "use")) {
string id = get_string(device, "id"); string id = get_string(device, "id");
foreach(DeviceInfo& info, devices) { foreach (DeviceInfo &info, devices) {
if(info.id == id) { if (info.id == id) {
used_devices.push_back(info); used_devices.push_back(info);
break; break;
} }
} }
} }
} RNA_END; }
RNA_END;
if(!used_devices.empty()) { if (!used_devices.empty()) {
int threads = blender_device_threads(b_scene); int threads = blender_device_threads(b_scene);
device = Device::get_multi_device(used_devices, device = Device::get_multi_device(used_devices, threads, background);
threads, }
background); /* Else keep using the CPU device that was set before. */
} }
/* Else keep using the CPU device that was set before. */ }
}
}
return device; return device;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -27,11 +27,13 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* Get number of threads to use for rendering. */ /* Get number of threads to use for rendering. */
int blender_device_threads(BL::Scene& b_scene); int blender_device_threads(BL::Scene &b_scene);
/* Convert Blender settings to device specification. */ /* Convert Blender settings to device specification. */
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background); DeviceInfo blender_device_info(BL::Preferences &b_preferences,
BL::Scene &b_scene,
bool background);
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BLENDER_DEVICE_H__ */ #endif /* __BLENDER_DEVICE_H__ */

View File

@@ -19,15 +19,15 @@
void CCL_init_logging(const char *argv0) void CCL_init_logging(const char *argv0)
{ {
ccl::util_logging_init(argv0); ccl::util_logging_init(argv0);
} }
void CCL_start_debug_logging() void CCL_start_debug_logging()
{ {
ccl::util_logging_start(); ccl::util_logging_start();
} }
void CCL_logging_verbosity_set(int verbosity) void CCL_logging_verbosity_set(int verbosity)
{ {
ccl::util_logging_verbosity_set(verbosity); ccl::util_logging_verbosity_set(verbosity);
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -22,72 +22,69 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene) BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene &b_scene)
: use_scene_camera_cull_(false), : use_scene_camera_cull_(false),
use_camera_cull_(false), use_camera_cull_(false),
camera_cull_margin_(0.0f), camera_cull_margin_(0.0f),
use_scene_distance_cull_(false), use_scene_distance_cull_(false),
use_distance_cull_(false), use_distance_cull_(false),
distance_cull_margin_(0.0f) distance_cull_margin_(0.0f)
{ {
if(b_scene.render().use_simplify()) { if (b_scene.render().use_simplify()) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA && use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() && !b_scene.render().use_multiview() &&
get_boolean(cscene, "use_camera_cull"); get_boolean(cscene, "use_camera_cull");
use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA && use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() && !b_scene.render().use_multiview() &&
get_boolean(cscene, "use_distance_cull"); get_boolean(cscene, "use_distance_cull");
camera_cull_margin_ = get_float(cscene, "camera_cull_margin"); camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
distance_cull_margin_ = get_float(cscene, "distance_cull_margin"); distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
if(distance_cull_margin_ == 0.0f) { if (distance_cull_margin_ == 0.0f) {
use_scene_distance_cull_ = false; use_scene_distance_cull_ = false;
} }
} }
} }
void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob) void BlenderObjectCulling::init_object(Scene *scene, BL::Object &b_ob)
{ {
if(!use_scene_camera_cull_ && !use_scene_distance_cull_) { if (!use_scene_camera_cull_ && !use_scene_distance_cull_) {
return; return;
} }
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles"); PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull"); use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull"); use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
if(use_camera_cull_ || use_distance_cull_) { if (use_camera_cull_ || use_distance_cull_) {
/* Need to have proper projection matrix. */ /* Need to have proper projection matrix. */
scene->camera->update(scene); scene->camera->update(scene);
} }
} }
bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm) bool BlenderObjectCulling::test(Scene *scene, BL::Object &b_ob, Transform &tfm)
{ {
if(!use_camera_cull_ && !use_distance_cull_) { if (!use_camera_cull_ && !use_distance_cull_) {
return false; return false;
} }
/* Compute world space bounding box corners. */ /* Compute world space bounding box corners. */
float3 bb[8]; float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box(); BL::Array<float, 24> boundbox = b_ob.bound_box();
for(int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0], float3 p = make_float3(boundbox[3 * i + 0], boundbox[3 * i + 1], boundbox[3 * i + 2]);
boundbox[3 * i + 1], bb[i] = transform_point(&tfm, p);
boundbox[3 * i + 2]); }
bb[i] = transform_point(&tfm, p);
}
bool camera_culled = use_camera_cull_ && test_camera(scene, bb); bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
bool distance_culled = use_distance_cull_ && test_distance(scene, bb); bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
return ((camera_culled && distance_culled) || return ((camera_culled && distance_culled) || (camera_culled && !use_distance_cull_) ||
(camera_culled && !use_distance_cull_) || (distance_culled && !use_camera_cull_));
(distance_culled && !use_camera_cull_));
} }
/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order /* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
@@ -95,54 +92,50 @@ bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
*/ */
bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8]) bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
{ {
Camera *cam = scene->camera; Camera *cam = scene->camera;
const ProjectionTransform& worldtondc = cam->worldtondc; const ProjectionTransform &worldtondc = cam->worldtondc;
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX), float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX); bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
bool all_behind = true; bool all_behind = true;
for(int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
float3 p = bb[i]; float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f); float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(dot(worldtondc.x, b), float4 c = make_float4(
dot(worldtondc.y, b), dot(worldtondc.x, b), dot(worldtondc.y, b), dot(worldtondc.z, b), dot(worldtondc.w, b));
dot(worldtondc.z, b), p = float4_to_float3(c / c.w);
dot(worldtondc.w, b)); if (c.z < 0.0f) {
p = float4_to_float3(c / c.w); p.x = 1.0f - p.x;
if(c.z < 0.0f) { p.y = 1.0f - p.y;
p.x = 1.0f - p.x; }
p.y = 1.0f - p.y; if (c.z >= -camera_cull_margin_) {
} all_behind = false;
if(c.z >= -camera_cull_margin_) { }
all_behind = false; bb_min = min(bb_min, p);
} bb_max = max(bb_max, p);
bb_min = min(bb_min, p); }
bb_max = max(bb_max, p); if (all_behind) {
} return true;
if(all_behind) { }
return true; return (bb_min.x >= 1.0f + camera_cull_margin_ || bb_min.y >= 1.0f + camera_cull_margin_ ||
} bb_max.x <= -camera_cull_margin_ || bb_max.y <= -camera_cull_margin_);
return (bb_min.x >= 1.0f + camera_cull_margin_ ||
bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ ||
bb_max.y <= -camera_cull_margin_);
} }
bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8]) bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
{ {
float3 camera_position = transform_get_column(&scene->camera->matrix, 3); float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX), float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX); bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
/* Find min & max points for x & y & z on bounding box */ /* Find min & max points for x & y & z on bounding box */
for(int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
float3 p = bb[i]; float3 p = bb[i];
bb_min = min(bb_min, p); bb_min = min(bb_min, p);
bb_max = max(bb_max, p); bb_max = max(bb_max, p);
} }
float3 closest_point = max(min(bb_max,camera_position),bb_min); float3 closest_point = max(min(bb_max, camera_position), bb_min);
return (len_squared(camera_position - closest_point) > return (len_squared(camera_position - closest_point) >
distance_cull_margin_ * distance_cull_margin_); distance_cull_margin_ * distance_cull_margin_);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -24,26 +24,25 @@ CCL_NAMESPACE_BEGIN
class Scene; class Scene;
class BlenderObjectCulling class BlenderObjectCulling {
{ public:
public: BlenderObjectCulling(Scene *scene, BL::Scene &b_scene);
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
void init_object(Scene *scene, BL::Object& b_ob); void init_object(Scene *scene, BL::Object &b_ob);
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm); bool test(Scene *scene, BL::Object &b_ob, Transform &tfm);
private: private:
bool test_camera(Scene *scene, float3 bb[8]); bool test_camera(Scene *scene, float3 bb[8]);
bool test_distance(Scene *scene, float3 bb[8]); bool test_distance(Scene *scene, float3 bb[8]);
bool use_scene_camera_cull_; bool use_scene_camera_cull_;
bool use_camera_cull_; bool use_camera_cull_;
float camera_cull_margin_; float camera_cull_margin_;
bool use_scene_distance_cull_; bool use_scene_distance_cull_;
bool use_distance_cull_; bool use_distance_cull_;
float distance_cull_margin_; float distance_cull_margin_;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BLENDER_OBJECT_CULL_H__ */ #endif /* __BLENDER_OBJECT_CULL_H__ */

View File

@@ -27,66 +27,66 @@ CCL_NAMESPACE_BEGIN
/* Utilities */ /* Utilities */
bool BlenderSync::sync_dupli_particle(BL::Object& b_ob, bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance& b_instance, BL::DepsgraphObjectInstance &b_instance,
Object *object) Object *object)
{ {
/* test if this dupli was generated from a particle sytem */ /* test if this dupli was generated from a particle sytem */
BL::ParticleSystem b_psys = b_instance.particle_system(); BL::ParticleSystem b_psys = b_instance.particle_system();
if(!b_psys) if (!b_psys)
return false; return false;
object->hide_on_missing_motion = true; object->hide_on_missing_motion = true;
/* test if we need particle data */ /* test if we need particle data */
if(!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE)) if (!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
return false; return false;
/* don't handle child particles yet */ /* don't handle child particles yet */
BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id(); BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id();
if(persistent_id[0] >= b_psys.particles.length()) if (persistent_id[0] >= b_psys.particles.length())
return false; return false;
/* find particle system */ /* find particle system */
ParticleSystemKey key(b_ob, persistent_id); ParticleSystemKey key(b_ob, persistent_id);
ParticleSystem *psys; ParticleSystem *psys;
bool first_use = !particle_system_map.is_used(key); bool first_use = !particle_system_map.is_used(key);
bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key); bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key);
/* no update needed? */ /* no update needed? */
if(!need_update && !object->mesh->need_update && !scene->object_manager->need_update) if (!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
return true; return true;
/* first time used in this sync loop? clear and tag update */ /* first time used in this sync loop? clear and tag update */
if(first_use) { if (first_use) {
psys->particles.clear(); psys->particles.clear();
psys->tag_update(scene); psys->tag_update(scene);
} }
/* add particle */ /* add particle */
BL::Particle b_pa = b_psys.particles[persistent_id[0]]; BL::Particle b_pa = b_psys.particles[persistent_id[0]];
Particle pa; Particle pa;
pa.index = persistent_id[0]; pa.index = persistent_id[0];
pa.age = b_scene.frame_current() - b_pa.birth_time(); pa.age = b_scene.frame_current() - b_pa.birth_time();
pa.lifetime = b_pa.lifetime(); pa.lifetime = b_pa.lifetime();
pa.location = get_float3(b_pa.location()); pa.location = get_float3(b_pa.location());
pa.rotation = get_float4(b_pa.rotation()); pa.rotation = get_float4(b_pa.rotation());
pa.size = b_pa.size(); pa.size = b_pa.size();
pa.velocity = get_float3(b_pa.velocity()); pa.velocity = get_float3(b_pa.velocity());
pa.angular_velocity = get_float3(b_pa.angular_velocity()); pa.angular_velocity = get_float3(b_pa.angular_velocity());
psys->particles.push_back_slow(pa); psys->particles.push_back_slow(pa);
if(object->particle_index != psys->particles.size() - 1) if (object->particle_index != psys->particles.size() - 1)
scene->object_manager->tag_update(scene); scene->object_manager->tag_update(scene);
object->particle_system = psys; object->particle_system = psys;
object->particle_index = psys->particles.size() - 1; object->particle_index = psys->particles.size() - 1;
/* return that this object has particle data */ /* return that this object has particle data */
return true; return true;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -33,159 +33,153 @@ class RenderBuffers;
class RenderTile; class RenderTile;
class BlenderSession { class BlenderSession {
public: public:
BlenderSession(BL::RenderEngine& b_engine, BlenderSession(BL::RenderEngine &b_engine,
BL::Preferences& b_userpref, BL::Preferences &b_userpref,
BL::BlendData& b_data, BL::BlendData &b_data,
bool preview_osl); bool preview_osl);
BlenderSession(BL::RenderEngine& b_engine, BlenderSession(BL::RenderEngine &b_engine,
BL::Preferences& b_userpref, BL::Preferences &b_userpref,
BL::BlendData& b_data, BL::BlendData &b_data,
BL::SpaceView3D& b_v3d, BL::SpaceView3D &b_v3d,
BL::RegionView3D& b_rv3d, BL::RegionView3D &b_rv3d,
int width, int height); int width,
int height);
~BlenderSession(); ~BlenderSession();
void create(); void create();
/* session */ /* session */
void create_session(); void create_session();
void free_session(); void free_session();
void reset_session(BL::BlendData& b_data, void reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph);
BL::Depsgraph& b_depsgraph);
/* offline render */ /* offline render */
void render(BL::Depsgraph& b_depsgraph); void render(BL::Depsgraph &b_depsgraph);
void bake(BL::Depsgraph& b_depsgrah, void bake(BL::Depsgraph &b_depsgrah,
BL::Object& b_object, BL::Object &b_object,
const string& pass_type, const string &pass_type,
const int custom_flag, const int custom_flag,
const int object_id, const int object_id,
BL::BakePixel& pixel_array, BL::BakePixel &pixel_array,
const size_t num_pixels, const size_t num_pixels,
const int depth, const int depth,
float pixels[]); float pixels[]);
void write_render_result(BL::RenderResult& b_rr, void write_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
BL::RenderLayer& b_rlay, void write_render_tile(RenderTile &rtile);
RenderTile& rtile);
void write_render_tile(RenderTile& rtile);
/* update functions are used to update display buffer only after sample was rendered /* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */ * only needed for better visual feedback */
void update_render_result(BL::RenderResult& b_rr, void update_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
BL::RenderLayer& b_rlay, void update_render_tile(RenderTile &rtile, bool highlight);
RenderTile& rtile);
void update_render_tile(RenderTile& rtile, bool highlight);
/* interactive updates */ /* interactive updates */
void synchronize(BL::Depsgraph& b_depsgraph); void synchronize(BL::Depsgraph &b_depsgraph);
/* drawing */ /* drawing */
bool draw(int w, int h); bool draw(int w, int h);
void tag_redraw(); void tag_redraw();
void tag_update(); void tag_update();
void get_status(string& status, string& substatus); void get_status(string &status, string &substatus);
void get_kernel_status(string& kernel_status); void get_kernel_status(string &kernel_status);
void get_progress(float& progress, double& total_time, double& render_time); void get_progress(float &progress, double &total_time, double &render_time);
void test_cancel(); void test_cancel();
void update_status_progress(); void update_status_progress();
void update_bake_progress(); void update_bake_progress();
bool background; bool background;
Session *session; Session *session;
Scene *scene; Scene *scene;
BlenderSync *sync; BlenderSync *sync;
double last_redraw_time; double last_redraw_time;
BL::RenderEngine b_engine; BL::RenderEngine b_engine;
BL::Preferences b_userpref; BL::Preferences b_userpref;
BL::BlendData b_data; BL::BlendData b_data;
BL::RenderSettings b_render; BL::RenderSettings b_render;
BL::Depsgraph b_depsgraph; BL::Depsgraph b_depsgraph;
/* NOTE: Blender's scene might become invalid after call /* NOTE: Blender's scene might become invalid after call
* free_blender_memory_if_possible(). * free_blender_memory_if_possible().
*/ */
BL::Scene b_scene; BL::Scene b_scene;
BL::SpaceView3D b_v3d; BL::SpaceView3D b_v3d;
BL::RegionView3D b_rv3d; BL::RegionView3D b_rv3d;
string b_rlay_name; string b_rlay_name;
string b_rview_name; string b_rview_name;
string last_status; string last_status;
string last_error; string last_error;
float last_progress; float last_progress;
double last_status_time; double last_status_time;
int width, height; int width, height;
bool preview_osl; bool preview_osl;
double start_resize_time; double start_resize_time;
void *python_thread_state; void *python_thread_state;
/* Global state which is common for all render sessions created from Blender. /* Global state which is common for all render sessions created from Blender.
* Usually denotes command line arguments. * Usually denotes command line arguments.
*/ */
/* Blender is running from the command line, no windows are shown and some /* Blender is running from the command line, no windows are shown and some
* extra render optimization is possible (possible to free draw-only data and * extra render optimization is possible (possible to free draw-only data and
* so on. * so on.
*/ */
static bool headless; static bool headless;
/* ** Resumable render ** */ /* ** Resumable render ** */
/* Overall number of chunks in which the sample range is to be devided. */ /* Overall number of chunks in which the sample range is to be devided. */
static int num_resumable_chunks; static int num_resumable_chunks;
/* Current resumable chunk index to render. */ /* Current resumable chunk index to render. */
static int current_resumable_chunk; static int current_resumable_chunk;
/* Alternative to single-chunk rendering to render a range of chunks. */ /* Alternative to single-chunk rendering to render a range of chunks. */
static int start_resumable_chunk; static int start_resumable_chunk;
static int end_resumable_chunk; static int end_resumable_chunk;
static bool print_render_stats; static bool print_render_stats;
protected: protected:
void stamp_view_layer_metadata(Scene *scene, const string& view_layer_name); void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
void do_write_update_render_result(BL::RenderResult& b_rr, void do_write_update_render_result(BL::RenderResult &b_rr,
BL::RenderLayer& b_rlay, BL::RenderLayer &b_rlay,
RenderTile& rtile, RenderTile &rtile,
bool do_update_only); bool do_update_only);
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight); void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
int builtin_image_frame(const string &builtin_name); int builtin_image_frame(const string &builtin_name);
void builtin_image_info(const string &builtin_name, void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata);
void *builtin_data, bool builtin_image_pixels(const string &builtin_name,
ImageMetaData& metadata); void *builtin_data,
bool builtin_image_pixels(const string &builtin_name, unsigned char *pixels,
void *builtin_data, const size_t pixels_size,
unsigned char *pixels, const bool free_cache);
const size_t pixels_size, bool builtin_image_float_pixels(const string &builtin_name,
const bool free_cache); void *builtin_data,
bool builtin_image_float_pixels(const string &builtin_name, float *pixels,
void *builtin_data, const size_t pixels_size,
float *pixels, const bool free_cache);
const size_t pixels_size, void builtin_images_load();
const bool free_cache);
void builtin_images_load();
/* Update tile manager to reflect resumable render settings. */ /* Update tile manager to reflect resumable render settings. */
void update_resumable_tile_manager(int num_samples); void update_resumable_tile_manager(int num_samples);
/* Is used after each render layer synchronization is done with the goal /* Is used after each render layer synchronization is done with the goal
* of freeing render engine data which is held from Blender side (for * of freeing render engine data which is held from Blender side (for
* example, dependency graph). * example, dependency graph).
*/ */
void free_blender_memory_if_possible(); void free_blender_memory_if_possible();
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BLENDER_SESSION_H__ */ #endif /* __BLENDER_SESSION_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -49,171 +49,173 @@ class ShaderGraph;
class ShaderNode; class ShaderNode;
class BlenderSync { class BlenderSync {
public: public:
BlenderSync(BL::RenderEngine& b_engine, BlenderSync(BL::RenderEngine &b_engine,
BL::BlendData& b_data, BL::BlendData &b_data,
BL::Scene& b_scene, BL::Scene &b_scene,
Scene *scene, Scene *scene,
bool preview, bool preview,
Progress &progress); Progress &progress);
~BlenderSync(); ~BlenderSync();
/* sync */ /* sync */
void sync_recalc(BL::Depsgraph& b_depsgraph); void sync_recalc(BL::Depsgraph &b_depsgraph);
void sync_data(BL::RenderSettings& b_render, void sync_data(BL::RenderSettings &b_render,
BL::Depsgraph& b_depsgraph, BL::Depsgraph &b_depsgraph,
BL::SpaceView3D& b_v3d, BL::SpaceView3D &b_v3d,
BL::Object& b_override, BL::Object &b_override,
int width, int height, int width,
void **python_thread_state); int height,
void sync_view_layer(BL::SpaceView3D& b_v3d, BL::ViewLayer& b_view_layer); void **python_thread_state);
vector<Pass> sync_render_passes(BL::RenderLayer& b_render_layer, void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
BL::ViewLayer& b_view_layer); vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
void sync_integrator(); void sync_integrator();
void sync_camera(BL::RenderSettings& b_render, void sync_camera(BL::RenderSettings &b_render,
BL::Object& b_override, BL::Object &b_override,
int width, int height, int width,
const char *viewname); int height,
void sync_view(BL::SpaceView3D& b_v3d, const char *viewname);
BL::RegionView3D& b_rv3d, void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
int width, int height); inline int get_layer_samples()
inline int get_layer_samples() { return view_layer.samples; } {
inline int get_layer_bound_samples() { return view_layer.bound_samples; } return view_layer.samples;
}
inline int get_layer_bound_samples()
{
return view_layer.bound_samples;
}
/* get parameters */ /* get parameters */
static SceneParams get_scene_params(BL::Scene& b_scene, static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
bool background); static SessionParams get_session_params(BL::RenderEngine &b_engine,
static SessionParams get_session_params(BL::RenderEngine& b_engine, BL::Preferences &b_userpref,
BL::Preferences& b_userpref, BL::Scene &b_scene,
BL::Scene& b_scene, bool background);
bool background); static bool get_session_pause(BL::Scene &b_scene, bool background);
static bool get_session_pause(BL::Scene& b_scene, bool background); static BufferParams get_buffer_params(BL::RenderSettings &b_render,
static BufferParams get_buffer_params(BL::RenderSettings& b_render, BL::SpaceView3D &b_v3d,
BL::SpaceView3D& b_v3d, BL::RegionView3D &b_rv3d,
BL::RegionView3D& b_rv3d, Camera *cam,
Camera *cam, int width,
int width, int height); int height);
static PassType get_pass_type(BL::RenderPass& b_pass); static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass& b_pass); static int get_denoising_pass(BL::RenderPass &b_pass);
private: private:
/* sync */ /* sync */
void sync_lights(BL::Depsgraph& b_depsgraph, bool update_all); void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph& b_depsgraph, bool update_all); void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_objects(BL::Depsgraph& b_depsgraph, float motion_time = 0.0f); void sync_objects(BL::Depsgraph &b_depsgraph, float motion_time = 0.0f);
void sync_motion(BL::RenderSettings& b_render, void sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph& b_depsgraph, BL::Depsgraph &b_depsgraph,
BL::Object& b_override, BL::Object &b_override,
int width, int height, int width,
void **python_thread_state); int height,
void sync_film(); void **python_thread_state);
void sync_view(); void sync_film();
void sync_world(BL::Depsgraph& b_depsgraph, bool update_all); void sync_view();
void sync_shaders(BL::Depsgraph& b_depsgraph); void sync_world(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_curve_settings(); void sync_shaders(BL::Depsgraph &b_depsgraph);
void sync_curve_settings();
void sync_nodes(Shader *shader, BL::ShaderNodeTree& b_ntree); void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
Mesh *sync_mesh(BL::Depsgraph& b_depsgrpah, Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
BL::Object& b_ob, BL::Object &b_ob,
BL::Object& b_ob_instance, BL::Object &b_ob_instance,
bool object_updated, bool object_updated,
bool show_self, bool show_self,
bool show_particles); bool show_particles);
void sync_curves(Mesh *mesh, void sync_curves(
BL::Mesh& b_mesh, Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
BL::Object& b_ob, Object *sync_object(BL::Depsgraph &b_depsgraph,
bool motion, BL::ViewLayer &b_view_layer,
int motion_step = 0); BL::DepsgraphObjectInstance &b_instance,
Object *sync_object(BL::Depsgraph& b_depsgraph, float motion_time,
BL::ViewLayer& b_view_layer, bool show_self,
BL::DepsgraphObjectInstance& b_instance, bool show_particles,
float motion_time, BlenderObjectCulling &culling,
bool show_self, bool *use_portal);
bool show_particles, void sync_light(BL::Object &b_parent,
BlenderObjectCulling& culling, int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
bool *use_portal); BL::Object &b_ob,
void sync_light(BL::Object& b_parent, BL::Object &b_ob_instance,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE], int random_id,
BL::Object& b_ob, Transform &tfm,
BL::Object& b_ob_instance, bool *use_portal);
int random_id, void sync_background_light(bool use_portal);
Transform& tfm, void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
bool *use_portal); BL::Object &b_ob,
void sync_background_light(bool use_portal); Object *object,
void sync_mesh_motion(BL::Depsgraph& b_depsgraph, float motion_time);
BL::Object& b_ob, void sync_camera_motion(
Object *object, BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
float motion_time);
void sync_camera_motion(BL::RenderSettings& b_render,
BL::Object& b_ob,
int width, int height,
float motion_time);
/* particles */ /* particles */
bool sync_dupli_particle(BL::Object& b_ob, bool sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance& b_instance, BL::DepsgraphObjectInstance &b_instance,
Object *object); Object *object);
/* Images. */ /* Images. */
void sync_images(); void sync_images();
/* Early data free. */ /* Early data free. */
void free_data_after_sync(BL::Depsgraph& b_depsgraph); void free_data_after_sync(BL::Depsgraph &b_depsgraph);
/* util */ /* util */
void find_shader(BL::ID& id, vector<Shader*>& used_shaders, Shader *default_shader); void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object& b_ob); bool BKE_object_is_modified(BL::Object &b_ob);
bool object_is_mesh(BL::Object& b_ob); bool object_is_mesh(BL::Object &b_ob);
bool object_is_light(BL::Object& b_ob); bool object_is_light(BL::Object &b_ob);
/* variables */ /* variables */
BL::RenderEngine b_engine; BL::RenderEngine b_engine;
BL::BlendData b_data; BL::BlendData b_data;
BL::Scene b_scene; BL::Scene b_scene;
id_map<void*, Shader> shader_map; id_map<void *, Shader> shader_map;
id_map<ObjectKey, Object> object_map; id_map<ObjectKey, Object> object_map;
id_map<void*, Mesh> mesh_map; id_map<void *, Mesh> mesh_map;
id_map<ObjectKey, Light> light_map; id_map<ObjectKey, Light> light_map;
id_map<ParticleSystemKey, ParticleSystem> particle_system_map; id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
set<Mesh*> mesh_synced; set<Mesh *> mesh_synced;
set<Mesh*> mesh_motion_synced; set<Mesh *> mesh_motion_synced;
set<float> motion_times; set<float> motion_times;
void *world_map; void *world_map;
bool world_recalc; bool world_recalc;
Scene *scene; Scene *scene;
bool preview; bool preview;
bool experimental; bool experimental;
float dicing_rate; float dicing_rate;
int max_subdivisions; int max_subdivisions;
struct RenderLayerInfo { struct RenderLayerInfo {
RenderLayerInfo() RenderLayerInfo()
: material_override(PointerRNA_NULL), : material_override(PointerRNA_NULL),
use_background_shader(true), use_background_shader(true),
use_background_ao(true), use_background_ao(true),
use_surfaces(true), use_surfaces(true),
use_hair(true), use_hair(true),
samples(0), samples(0),
bound_samples(false) bound_samples(false)
{} {
}
string name; string name;
BL::Material material_override; BL::Material material_override;
bool use_background_shader; bool use_background_shader;
bool use_background_ao; bool use_background_ao;
bool use_surfaces; bool use_surfaces;
bool use_hair; bool use_hair;
int samples; int samples;
bool bound_samples; bool bound_samples;
} view_layer; } view_layer;
Progress &progress; Progress &progress;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BLENDER_SYNC_H__ */ #endif /* __BLENDER_SYNC_H__ */

View File

@@ -22,36 +22,36 @@ namespace {
/* Point density helpers. */ /* Point density helpers. */
void density_texture_space_invert(float3& loc, void density_texture_space_invert(float3 &loc, float3 &size)
float3& size)
{ {
if(size.x != 0.0f) size.x = 0.5f/size.x; if (size.x != 0.0f)
if(size.y != 0.0f) size.y = 0.5f/size.y; size.x = 0.5f / size.x;
if(size.z != 0.0f) size.z = 0.5f/size.z; if (size.y != 0.0f)
size.y = 0.5f / size.y;
if (size.z != 0.0f)
size.z = 0.5f / size.z;
loc = loc*size - make_float3(0.5f, 0.5f, 0.5f); loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
} }
} /* namespace */ } /* namespace */
void point_density_texture_space(BL::Depsgraph& b_depsgraph, void point_density_texture_space(BL::Depsgraph &b_depsgraph,
BL::ShaderNodeTexPointDensity& b_point_density_node, BL::ShaderNodeTexPointDensity &b_point_density_node,
float3& loc, float3 &loc,
float3& size) float3 &size)
{ {
BL::Object b_ob(b_point_density_node.object()); BL::Object b_ob(b_point_density_node.object());
if(!b_ob) { if (!b_ob) {
loc = make_float3(0.0f, 0.0f, 0.0f); loc = make_float3(0.0f, 0.0f, 0.0f);
size = make_float3(0.0f, 0.0f, 0.0f); size = make_float3(0.0f, 0.0f, 0.0f);
return; return;
} }
float3 min, max; float3 min, max;
b_point_density_node.calc_point_density_minmax(b_depsgraph, b_point_density_node.calc_point_density_minmax(b_depsgraph, &min[0], &max[0]);
&min[0], loc = (min + max) * 0.5f;
&max[0]); size = (max - min) * 0.5f;
loc = (min + max) * 0.5f; density_texture_space_invert(loc, size);
size = (max - min) * 0.5f;
density_texture_space_invert(loc, size);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -22,11 +22,11 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
void point_density_texture_space(BL::Depsgraph& b_depsgraph, void point_density_texture_space(BL::Depsgraph &b_depsgraph,
BL::ShaderNodeTexPointDensity& b_point_density_node, BL::ShaderNodeTexPointDensity &b_point_density_node,
float3& loc, float3 &loc,
float3& size); float3 &size);
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BLENDER_TEXTURE_H__ */ #endif /* __BLENDER_TEXTURE_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,42 +1,42 @@
set(INC set(INC
.. ..
) )
set(INC_SYS set(INC_SYS
) )
set(SRC set(SRC
bvh.cpp bvh.cpp
bvh2.cpp bvh2.cpp
bvh4.cpp bvh4.cpp
bvh8.cpp bvh8.cpp
bvh_binning.cpp bvh_binning.cpp
bvh_build.cpp bvh_build.cpp
bvh_embree.cpp bvh_embree.cpp
bvh_node.cpp bvh_node.cpp
bvh_sort.cpp bvh_sort.cpp
bvh_split.cpp bvh_split.cpp
bvh_unaligned.cpp bvh_unaligned.cpp
) )
set(SRC_HEADERS set(SRC_HEADERS
bvh.h bvh.h
bvh2.h bvh2.h
bvh4.h bvh4.h
bvh8.h bvh8.h
bvh_binning.h bvh_binning.h
bvh_build.h bvh_build.h
bvh_embree.h bvh_embree.h
bvh_node.h bvh_node.h
bvh_params.h bvh_params.h
bvh_sort.h bvh_sort.h
bvh_split.h bvh_split.h
bvh_unaligned.h bvh_unaligned.h
) )
set(LIB set(LIB
cycles_render cycles_render
) )
include_directories(${INC}) include_directories(${INC})

View File

@@ -27,7 +27,7 @@
#include "bvh/bvh_node.h" #include "bvh/bvh_node.h"
#ifdef WITH_EMBREE #ifdef WITH_EMBREE
#include "bvh/bvh_embree.h" # include "bvh/bvh_embree.h"
#endif #endif
#include "util/util_foreach.h" #include "util/util_foreach.h"
@@ -40,533 +40,529 @@ CCL_NAMESPACE_BEGIN
const char *bvh_layout_name(BVHLayout layout) const char *bvh_layout_name(BVHLayout layout)
{ {
switch(layout) { switch (layout) {
case BVH_LAYOUT_BVH2: return "BVH2"; case BVH_LAYOUT_BVH2:
case BVH_LAYOUT_BVH4: return "BVH4"; return "BVH2";
case BVH_LAYOUT_BVH8: return "BVH8"; case BVH_LAYOUT_BVH4:
case BVH_LAYOUT_NONE: return "NONE"; return "BVH4";
case BVH_LAYOUT_EMBREE: return "EMBREE"; case BVH_LAYOUT_BVH8:
case BVH_LAYOUT_ALL: return "ALL"; return "BVH8";
} case BVH_LAYOUT_NONE:
LOG(DFATAL) << "Unsupported BVH layout was passed."; return "NONE";
return ""; case BVH_LAYOUT_EMBREE:
return "EMBREE";
case BVH_LAYOUT_ALL:
return "ALL";
}
LOG(DFATAL) << "Unsupported BVH layout was passed.";
return "";
} }
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts)
BVHLayoutMask supported_layouts)
{ {
const BVHLayoutMask requested_layout_mask = (BVHLayoutMask)requested_layout; const BVHLayoutMask requested_layout_mask = (BVHLayoutMask)requested_layout;
/* Check whether requested layout is supported, if so -- no need to do /* Check whether requested layout is supported, if so -- no need to do
* any extra computation. * any extra computation.
*/ */
if(supported_layouts & requested_layout_mask) { if (supported_layouts & requested_layout_mask) {
return requested_layout; return requested_layout;
} }
/* Some bit magic to get widest supported BVH layout. */ /* Some bit magic to get widest supported BVH layout. */
/* This is a mask of supported BVH layouts which are narrower than the /* This is a mask of supported BVH layouts which are narrower than the
* requested one. * requested one.
*/ */
const BVHLayoutMask allowed_layouts_mask = const BVHLayoutMask allowed_layouts_mask = (supported_layouts & (requested_layout_mask - 1));
(supported_layouts & (requested_layout_mask - 1)); /* We get widest from allowed ones and convert mask to actual layout. */
/* We get widest from allowed ones and convert mask to actual layout. */ const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask);
const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask); return (BVHLayout)(1 << widest_allowed_layout_mask);
return (BVHLayout)(1 << widest_allowed_layout_mask);
} }
/* Pack Utility */ /* Pack Utility */
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i) BVHStackEntry::BVHStackEntry(const BVHNode *n, int i) : node(n), idx(i)
: node(n), idx(i)
{ {
} }
int BVHStackEntry::encodeIdx() const int BVHStackEntry::encodeIdx() const
{ {
return (node->is_leaf())? ~idx: idx; return (node->is_leaf()) ? ~idx : idx;
} }
/* BVH */ /* BVH */
BVH::BVH(const BVHParams& params_, const vector<Object*>& objects_) BVH::BVH(const BVHParams &params_, const vector<Object *> &objects_)
: params(params_), objects(objects_) : params(params_), objects(objects_)
{ {
} }
BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects) BVH *BVH::create(const BVHParams &params, const vector<Object *> &objects)
{ {
switch(params.bvh_layout) { switch (params.bvh_layout) {
case BVH_LAYOUT_BVH2: case BVH_LAYOUT_BVH2:
return new BVH2(params, objects); return new BVH2(params, objects);
case BVH_LAYOUT_BVH4: case BVH_LAYOUT_BVH4:
return new BVH4(params, objects); return new BVH4(params, objects);
case BVH_LAYOUT_BVH8: case BVH_LAYOUT_BVH8:
return new BVH8(params, objects); return new BVH8(params, objects);
case BVH_LAYOUT_EMBREE: case BVH_LAYOUT_EMBREE:
#ifdef WITH_EMBREE #ifdef WITH_EMBREE
return new BVHEmbree(params, objects); return new BVHEmbree(params, objects);
#endif #endif
case BVH_LAYOUT_NONE: case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL: case BVH_LAYOUT_ALL:
break; break;
} }
LOG(DFATAL) << "Requested unsupported BVH layout."; LOG(DFATAL) << "Requested unsupported BVH layout.";
return NULL; return NULL;
} }
/* Building */ /* Building */
void BVH::build(Progress& progress, Stats*) void BVH::build(Progress &progress, Stats *)
{ {
progress.set_substatus("Building BVH"); progress.set_substatus("Building BVH");
/* build nodes */ /* build nodes */
BVHBuild bvh_build(objects, BVHBuild bvh_build(objects,
pack.prim_type, pack.prim_type,
pack.prim_index, pack.prim_index,
pack.prim_object, pack.prim_object,
pack.prim_time, pack.prim_time,
params, params,
progress); progress);
BVHNode *bvh2_root = bvh_build.run(); BVHNode *bvh2_root = bvh_build.run();
if(progress.get_cancel()) { if (progress.get_cancel()) {
if(bvh2_root != NULL) { if (bvh2_root != NULL) {
bvh2_root->deleteSubtree(); bvh2_root->deleteSubtree();
} }
return; return;
} }
/* BVH builder returns tree in a binary mode (with two children per inner /* BVH builder returns tree in a binary mode (with two children per inner
* node. Need to adopt that for a wider BVH implementations. */ * node. Need to adopt that for a wider BVH implementations. */
BVHNode *root = widen_children_nodes(bvh2_root); BVHNode *root = widen_children_nodes(bvh2_root);
if(root != bvh2_root) { if (root != bvh2_root) {
bvh2_root->deleteSubtree(); bvh2_root->deleteSubtree();
} }
if(progress.get_cancel()) { if (progress.get_cancel()) {
if(root != NULL) { if (root != NULL) {
root->deleteSubtree(); root->deleteSubtree();
} }
return; return;
} }
/* pack triangles */ /* pack triangles */
progress.set_substatus("Packing BVH triangles and strands"); progress.set_substatus("Packing BVH triangles and strands");
pack_primitives(); pack_primitives();
if(progress.get_cancel()) { if (progress.get_cancel()) {
root->deleteSubtree(); root->deleteSubtree();
return; return;
} }
/* pack nodes */ /* pack nodes */
progress.set_substatus("Packing BVH nodes"); progress.set_substatus("Packing BVH nodes");
pack_nodes(root); pack_nodes(root);
/* free build nodes */ /* free build nodes */
root->deleteSubtree(); root->deleteSubtree();
} }
/* Refitting */ /* Refitting */
void BVH::refit(Progress& progress) void BVH::refit(Progress &progress)
{ {
progress.set_substatus("Packing BVH primitives"); progress.set_substatus("Packing BVH primitives");
pack_primitives(); pack_primitives();
if(progress.get_cancel()) return; if (progress.get_cancel())
return;
progress.set_substatus("Refitting BVH nodes"); progress.set_substatus("Refitting BVH nodes");
refit_nodes(); refit_nodes();
} }
void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility) void BVH::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility)
{ {
/* Refit range of primitives. */ /* Refit range of primitives. */
for(int prim = start; prim < end; prim++) { for (int prim = start; prim < end; prim++) {
int pidx = pack.prim_index[prim]; int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim]; int tob = pack.prim_object[prim];
Object *ob = objects[tob]; Object *ob = objects[tob];
if(pidx == -1) { if (pidx == -1) {
/* Object instance. */ /* Object instance. */
bbox.grow(ob->bounds); bbox.grow(ob->bounds);
} }
else { else {
/* Primitives. */ /* Primitives. */
const Mesh *mesh = ob->mesh; const Mesh *mesh = ob->mesh;
if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) { if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */ /* Curves. */
int str_offset = (params.top_level)? mesh->curve_offset: 0; int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset); Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]); int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox); curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
visibility |= PATH_RAY_CURVE; visibility |= PATH_RAY_CURVE;
/* Motion curves. */ /* Motion curves. */
if(mesh->use_motion_blur) { if (mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) { if (attr) {
size_t mesh_size = mesh->curve_keys.size(); size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1; size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3(); float3 *key_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) for (size_t i = 0; i < steps; i++)
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox); curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
} }
} }
} }
else { else {
/* Triangles. */ /* Triangles. */
int tri_offset = (params.top_level)? mesh->tri_offset: 0; int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset); Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0]; const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox); triangle.bounds_grow(vpos, bbox);
/* Motion triangles. */ /* Motion triangles. */
if(mesh->use_motion_blur) { if (mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) { if (attr) {
size_t mesh_size = mesh->verts.size(); size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1; size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3(); float3 *vert_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) for (size_t i = 0; i < steps; i++)
triangle.bounds_grow(vert_steps + i*mesh_size, bbox); triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
} }
} }
} }
} }
visibility |= ob->visibility_for_tracing(); visibility |= ob->visibility_for_tracing();
}
}
} }
/* Triangles */ /* Triangles */
void BVH::pack_triangle(int idx, float4 tri_verts[3]) void BVH::pack_triangle(int idx, float4 tri_verts[3])
{ {
int tob = pack.prim_object[idx]; int tob = pack.prim_object[idx];
assert(tob >= 0 && tob < objects.size()); assert(tob >= 0 && tob < objects.size());
const Mesh *mesh = objects[tob]->mesh; const Mesh *mesh = objects[tob]->mesh;
int tidx = pack.prim_index[idx]; int tidx = pack.prim_index[idx];
Mesh::Triangle t = mesh->get_triangle(tidx); Mesh::Triangle t = mesh->get_triangle(tidx);
const float3 *vpos = &mesh->verts[0]; const float3 *vpos = &mesh->verts[0];
float3 v0 = vpos[t.v[0]]; float3 v0 = vpos[t.v[0]];
float3 v1 = vpos[t.v[1]]; float3 v1 = vpos[t.v[1]];
float3 v2 = vpos[t.v[2]]; float3 v2 = vpos[t.v[2]];
tri_verts[0] = float3_to_float4(v0); tri_verts[0] = float3_to_float4(v0);
tri_verts[1] = float3_to_float4(v1); tri_verts[1] = float3_to_float4(v1);
tri_verts[2] = float3_to_float4(v2); tri_verts[2] = float3_to_float4(v2);
} }
void BVH::pack_primitives() void BVH::pack_primitives()
{ {
const size_t tidx_size = pack.prim_index.size(); const size_t tidx_size = pack.prim_index.size();
size_t num_prim_triangles = 0; size_t num_prim_triangles = 0;
/* Count number of triangles primitives in BVH. */ /* Count number of triangles primitives in BVH. */
for(unsigned int i = 0; i < tidx_size; i++) { for (unsigned int i = 0; i < tidx_size; i++) {
if((pack.prim_index[i] != -1)) { if ((pack.prim_index[i] != -1)) {
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
++num_prim_triangles; ++num_prim_triangles;
} }
} }
} }
/* Reserve size for arrays. */ /* Reserve size for arrays. */
pack.prim_tri_index.clear(); pack.prim_tri_index.clear();
pack.prim_tri_index.resize(tidx_size); pack.prim_tri_index.resize(tidx_size);
pack.prim_tri_verts.clear(); pack.prim_tri_verts.clear();
pack.prim_tri_verts.resize(num_prim_triangles * 3); pack.prim_tri_verts.resize(num_prim_triangles * 3);
pack.prim_visibility.clear(); pack.prim_visibility.clear();
pack.prim_visibility.resize(tidx_size); pack.prim_visibility.resize(tidx_size);
/* Fill in all the arrays. */ /* Fill in all the arrays. */
size_t prim_triangle_index = 0; size_t prim_triangle_index = 0;
for(unsigned int i = 0; i < tidx_size; i++) { for (unsigned int i = 0; i < tidx_size; i++) {
if(pack.prim_index[i] != -1) { if (pack.prim_index[i] != -1) {
int tob = pack.prim_object[i]; int tob = pack.prim_object[i];
Object *ob = objects[tob]; Object *ob = objects[tob];
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
pack_triangle(i, (float4*)&pack.prim_tri_verts[3 * prim_triangle_index]); pack_triangle(i, (float4 *)&pack.prim_tri_verts[3 * prim_triangle_index]);
pack.prim_tri_index[i] = 3 * prim_triangle_index; pack.prim_tri_index[i] = 3 * prim_triangle_index;
++prim_triangle_index; ++prim_triangle_index;
} }
else { else {
pack.prim_tri_index[i] = -1; pack.prim_tri_index[i] = -1;
} }
pack.prim_visibility[i] = ob->visibility_for_tracing(); pack.prim_visibility[i] = ob->visibility_for_tracing();
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack.prim_visibility[i] |= PATH_RAY_CURVE; pack.prim_visibility[i] |= PATH_RAY_CURVE;
} }
} }
else { else {
pack.prim_tri_index[i] = -1; pack.prim_tri_index[i] = -1;
pack.prim_visibility[i] = 0; pack.prim_visibility[i] = 0;
} }
} }
} }
/* Pack Instances */ /* Pack Instances */
void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
{ {
/* The BVH's for instances are built separately, but for traversal all /* The BVH's for instances are built separately, but for traversal all
* BVH's are stored in global arrays. This function merges them into the * BVH's are stored in global arrays. This function merges them into the
* top level BVH, adjusting indexes and offsets where appropriate. * top level BVH, adjusting indexes and offsets where appropriate.
*/ */
const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4); const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8); const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
/* Adjust primitive index to point to the triangle in the global array, for /* Adjust primitive index to point to the triangle in the global array, for
* meshes with transform applied and already in the top level BVH. * meshes with transform applied and already in the top level BVH.
*/ */
for(size_t i = 0; i < pack.prim_index.size(); i++) for (size_t i = 0; i < pack.prim_index.size(); i++)
if(pack.prim_index[i] != -1) { if (pack.prim_index[i] != -1) {
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset; pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
else else
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset; pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
} }
/* track offsets of instanced BVH data in global array */ /* track offsets of instanced BVH data in global array */
size_t prim_offset = pack.prim_index.size(); size_t prim_offset = pack.prim_index.size();
size_t nodes_offset = nodes_size; size_t nodes_offset = nodes_size;
size_t nodes_leaf_offset = leaf_nodes_size; size_t nodes_leaf_offset = leaf_nodes_size;
/* clear array that gives the node indexes for instanced objects */ /* clear array that gives the node indexes for instanced objects */
pack.object_node.clear(); pack.object_node.clear();
/* reserve */ /* reserve */
size_t prim_index_size = pack.prim_index.size(); size_t prim_index_size = pack.prim_index.size();
size_t prim_tri_verts_size = pack.prim_tri_verts.size(); size_t prim_tri_verts_size = pack.prim_tri_verts.size();
size_t pack_prim_index_offset = prim_index_size; size_t pack_prim_index_offset = prim_index_size;
size_t pack_prim_tri_verts_offset = prim_tri_verts_size; size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t pack_nodes_offset = nodes_size; size_t pack_nodes_offset = nodes_size;
size_t pack_leaf_nodes_offset = leaf_nodes_size; size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0; size_t object_offset = 0;
map<Mesh*, int> mesh_map; map<Mesh *, int> mesh_map;
foreach(Object *ob, objects) { foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh; Mesh *mesh = ob->mesh;
BVH *bvh = mesh->bvh; BVH *bvh = mesh->bvh;
if(mesh->need_build_bvh()) { if (mesh->need_build_bvh()) {
if(mesh_map.find(mesh) == mesh_map.end()) { if (mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size(); prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size(); prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
nodes_size += bvh->pack.nodes.size(); nodes_size += bvh->pack.nodes.size();
leaf_nodes_size += bvh->pack.leaf_nodes.size(); leaf_nodes_size += bvh->pack.leaf_nodes.size();
mesh_map[mesh] = 1; mesh_map[mesh] = 1;
} }
} }
} }
mesh_map.clear(); mesh_map.clear();
pack.prim_index.resize(prim_index_size); pack.prim_index.resize(prim_index_size);
pack.prim_type.resize(prim_index_size); pack.prim_type.resize(prim_index_size);
pack.prim_object.resize(prim_index_size); pack.prim_object.resize(prim_index_size);
pack.prim_visibility.resize(prim_index_size); pack.prim_visibility.resize(prim_index_size);
pack.prim_tri_verts.resize(prim_tri_verts_size); pack.prim_tri_verts.resize(prim_tri_verts_size);
pack.prim_tri_index.resize(prim_index_size); pack.prim_tri_index.resize(prim_index_size);
pack.nodes.resize(nodes_size); pack.nodes.resize(nodes_size);
pack.leaf_nodes.resize(leaf_nodes_size); pack.leaf_nodes.resize(leaf_nodes_size);
pack.object_node.resize(objects.size()); pack.object_node.resize(objects.size());
if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) { if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
pack.prim_time.resize(prim_index_size); pack.prim_time.resize(prim_index_size);
} }
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL; int *pack_prim_index = (pack.prim_index.size()) ? &pack.prim_index[0] : NULL;
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL; int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL; int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size())? &pack.prim_visibility[0]: NULL; uint *pack_prim_visibility = (pack.prim_visibility.size()) ? &pack.prim_visibility[0] : NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL; float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL; uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL;
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL; int4 *pack_nodes = (pack.nodes.size()) ? &pack.nodes[0] : NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL; int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL; float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
/* merge */ /* merge */
foreach(Object *ob, objects) { foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh; Mesh *mesh = ob->mesh;
/* We assume that if mesh doesn't need own BVH it was already included /* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed. * into a top-level BVH and no packing here is needed.
*/ */
if(!mesh->need_build_bvh()) { if (!mesh->need_build_bvh()) {
pack.object_node[object_offset++] = 0; pack.object_node[object_offset++] = 0;
continue; continue;
} }
/* if mesh already added once, don't add it again, but used set /* if mesh already added once, don't add it again, but used set
* node offset for this object */ * node offset for this object */
map<Mesh*, int>::iterator it = mesh_map.find(mesh); map<Mesh *, int>::iterator it = mesh_map.find(mesh);
if(mesh_map.find(mesh) != mesh_map.end()) { if (mesh_map.find(mesh) != mesh_map.end()) {
int noffset = it->second; int noffset = it->second;
pack.object_node[object_offset++] = noffset; pack.object_node[object_offset++] = noffset;
continue; continue;
} }
BVH *bvh = mesh->bvh; BVH *bvh = mesh->bvh;
int noffset = nodes_offset; int noffset = nodes_offset;
int noffset_leaf = nodes_leaf_offset; int noffset_leaf = nodes_leaf_offset;
int mesh_tri_offset = mesh->tri_offset; int mesh_tri_offset = mesh->tri_offset;
int mesh_curve_offset = mesh->curve_offset; int mesh_curve_offset = mesh->curve_offset;
/* fill in node indexes for instances */ /* fill in node indexes for instances */
if(bvh->pack.root_index == -1) if (bvh->pack.root_index == -1)
pack.object_node[object_offset++] = -noffset_leaf-1; pack.object_node[object_offset++] = -noffset_leaf - 1;
else else
pack.object_node[object_offset++] = noffset; pack.object_node[object_offset++] = noffset;
mesh_map[mesh] = pack.object_node[object_offset-1]; mesh_map[mesh] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */ /* merge primitive, object and triangle indexes */
if(bvh->pack.prim_index.size()) { if (bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size(); size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0]; int *bvh_prim_index = &bvh->pack.prim_index[0];
int *bvh_prim_type = &bvh->pack.prim_type[0]; int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0]; uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0]; uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL; float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
for(size_t i = 0; i < bvh_prim_index_size; i++) { for (size_t i = 0; i < bvh_prim_index_size; i++) {
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset; pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1; pack_prim_tri_index[pack_prim_index_offset] = -1;
} }
else { else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset; pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] = pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
bvh_prim_tri_index[i] + pack_prim_tri_verts_offset; pack_prim_tri_verts_offset;
} }
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i]; pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i]; pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
if(bvh_prim_time != NULL) { if (bvh_prim_time != NULL) {
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i]; pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
} }
pack_prim_index_offset++; pack_prim_index_offset++;
} }
} }
/* Merge triangle vertices data. */ /* Merge triangle vertices data. */
if(bvh->pack.prim_tri_verts.size()) { if (bvh->pack.prim_tri_verts.size()) {
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size(); const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset, memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
&bvh->pack.prim_tri_verts[0], &bvh->pack.prim_tri_verts[0],
prim_tri_size*sizeof(float4)); prim_tri_size * sizeof(float4));
pack_prim_tri_verts_offset += prim_tri_size; pack_prim_tri_verts_offset += prim_tri_size;
} }
/* merge nodes */ /* merge nodes */
if(bvh->pack.leaf_nodes.size()) { if (bvh->pack.leaf_nodes.size()) {
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0]; int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size(); size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size();
for(size_t i = 0, j = 0; for (size_t i = 0, j = 0; i < leaf_nodes_offset_size; i += BVH_NODE_LEAF_SIZE, j++) {
i < leaf_nodes_offset_size; int4 data = leaf_nodes_offset[i];
i += BVH_NODE_LEAF_SIZE, j++) data.x += prim_offset;
{ data.y += prim_offset;
int4 data = leaf_nodes_offset[i]; pack_leaf_nodes[pack_leaf_nodes_offset] = data;
data.x += prim_offset; for (int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) {
data.y += prim_offset; pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j];
pack_leaf_nodes[pack_leaf_nodes_offset] = data; }
for(int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) { pack_leaf_nodes_offset += BVH_NODE_LEAF_SIZE;
pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j]; }
} }
pack_leaf_nodes_offset += BVH_NODE_LEAF_SIZE;
}
}
if(bvh->pack.nodes.size()) { if (bvh->pack.nodes.size()) {
int4 *bvh_nodes = &bvh->pack.nodes[0]; int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size(); size_t bvh_nodes_size = bvh->pack.nodes.size();
for(size_t i = 0, j = 0; i < bvh_nodes_size; j++) { for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox; size_t nsize, nsize_bbox;
if(bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) { if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
if(use_obvh) { if (use_obvh) {
nsize = BVH_UNALIGNED_ONODE_SIZE; nsize = BVH_UNALIGNED_ONODE_SIZE;
nsize_bbox = BVH_UNALIGNED_ONODE_SIZE-1; nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
} }
else { else {
nsize = use_qbvh nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
? BVH_UNALIGNED_QNODE_SIZE nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
: BVH_UNALIGNED_NODE_SIZE; }
nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE-1 : 0; }
} else {
} if (use_obvh) {
else { nsize = BVH_ONODE_SIZE;
if(use_obvh) { nsize_bbox = BVH_ONODE_SIZE - 1;
nsize = BVH_ONODE_SIZE; }
nsize_bbox = BVH_ONODE_SIZE-1; else {
} nsize = (use_qbvh) ? BVH_QNODE_SIZE : BVH_NODE_SIZE;
else { nsize_bbox = (use_qbvh) ? BVH_QNODE_SIZE - 1 : 0;
nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE; }
nsize_bbox = (use_qbvh)? BVH_QNODE_SIZE-1 : 0; }
}
}
memcpy(pack_nodes + pack_nodes_offset, memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
bvh_nodes + i,
nsize_bbox*sizeof(int4));
/* Modify offsets into arrays */ /* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox]; int4 data = bvh_nodes[i + nsize_bbox];
int4 data1 = bvh_nodes[i + nsize_bbox-1]; int4 data1 = bvh_nodes[i + nsize_bbox - 1];
if(use_obvh) { if (use_obvh) {
data.z += (data.z < 0) ? -noffset_leaf : noffset; data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset; data.w += (data.w < 0) ? -noffset_leaf : noffset;
data.x += (data.x < 0) ? -noffset_leaf : noffset; data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0) ? -noffset_leaf : noffset; data.y += (data.y < 0) ? -noffset_leaf : noffset;
data1.z += (data1.z < 0) ? -noffset_leaf : noffset; data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
data1.w += (data1.w < 0) ? -noffset_leaf : noffset; data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
data1.x += (data1.x < 0) ? -noffset_leaf : noffset; data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
data1.y += (data1.y < 0) ? -noffset_leaf : noffset; data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
} }
else { else {
data.z += (data.z < 0) ? -noffset_leaf : noffset; data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset; data.w += (data.w < 0) ? -noffset_leaf : noffset;
if(use_qbvh) { if (use_qbvh) {
data.x += (data.x < 0)? -noffset_leaf: noffset; data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0)? -noffset_leaf: noffset; data.y += (data.y < 0) ? -noffset_leaf : noffset;
} }
} }
pack_nodes[pack_nodes_offset + nsize_bbox] = data; pack_nodes[pack_nodes_offset + nsize_bbox] = data;
if(use_obvh) { if (use_obvh) {
pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1; pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
} }
/* Usually this copies nothing, but we better /* Usually this copies nothing, but we better
* be prepared for possible node size extension. * be prepared for possible node size extension.
*/ */
memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox+1], memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox + 1],
&bvh_nodes[i + nsize_bbox+1], &bvh_nodes[i + nsize_bbox + 1],
sizeof(int4) * (nsize - (nsize_bbox+1))); sizeof(int4) * (nsize - (nsize_bbox + 1)));
pack_nodes_offset += nsize; pack_nodes_offset += nsize;
i += nsize; i += nsize;
} }
} }
nodes_offset += bvh->pack.nodes.size(); nodes_offset += bvh->pack.nodes.size();
nodes_leaf_offset += bvh->pack.leaf_nodes.size(); nodes_leaf_offset += bvh->pack.leaf_nodes.size();
prim_offset += bvh->pack.prim_index.size(); prim_offset += bvh->pack.prim_index.size();
} }
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -34,96 +34,92 @@ class LeafNode;
class Object; class Object;
class Progress; class Progress;
#define BVH_ALIGN 4096 #define BVH_ALIGN 4096
#define TRI_NODE_SIZE 3 #define TRI_NODE_SIZE 3
/* Packed BVH /* Packed BVH
* *
* BVH stored as it will be used for traversal on the rendering device. */ * BVH stored as it will be used for traversal on the rendering device. */
struct PackedBVH { struct PackedBVH {
/* BVH nodes storage, one node is 4x int4, and contains two bounding boxes, /* BVH nodes storage, one node is 4x int4, and contains two bounding boxes,
* and child, triangle or object indexes depending on the node type */ * and child, triangle or object indexes depending on the node type */
array<int4> nodes; array<int4> nodes;
/* BVH leaf nodes storage. */ /* BVH leaf nodes storage. */
array<int4> leaf_nodes; array<int4> leaf_nodes;
/* object index to BVH node index mapping for instances */ /* object index to BVH node index mapping for instances */
array<int> object_node; array<int> object_node;
/* Mapping from primitive index to index in triangle array. */ /* Mapping from primitive index to index in triangle array. */
array<uint> prim_tri_index; array<uint> prim_tri_index;
/* Continuous storage of triangle vertices. */ /* Continuous storage of triangle vertices. */
array<float4> prim_tri_verts; array<float4> prim_tri_verts;
/* primitive type - triangle or strand */ /* primitive type - triangle or strand */
array<int> prim_type; array<int> prim_type;
/* visibility visibilitys for primitives */ /* visibility visibilitys for primitives */
array<uint> prim_visibility; array<uint> prim_visibility;
/* mapping from BVH primitive index to true primitive index, as primitives /* mapping from BVH primitive index to true primitive index, as primitives
* may be duplicated due to spatial splits. -1 for instances. */ * may be duplicated due to spatial splits. -1 for instances. */
array<int> prim_index; array<int> prim_index;
/* mapping from BVH primitive index, to the object id of that primitive. */ /* mapping from BVH primitive index, to the object id of that primitive. */
array<int> prim_object; array<int> prim_object;
/* Time range of BVH primitive. */ /* Time range of BVH primitive. */
array<float2> prim_time; array<float2> prim_time;
/* index of the root node. */ /* index of the root node. */
int root_index; int root_index;
PackedBVH() PackedBVH()
{ {
root_index = 0; root_index = 0;
} }
}; };
enum BVH_TYPE { enum BVH_TYPE { bvh2, bvh4, bvh8 };
bvh2,
bvh4,
bvh8
};
/* BVH */ /* BVH */
class BVH class BVH {
{ public:
public: PackedBVH pack;
PackedBVH pack; BVHParams params;
BVHParams params; vector<Object *> objects;
vector<Object*> objects;
static BVH *create(const BVHParams& params, const vector<Object*>& objects); static BVH *create(const BVHParams &params, const vector<Object *> &objects);
virtual ~BVH() {} virtual ~BVH()
{
}
virtual void build(Progress& progress, Stats *stats=NULL); virtual void build(Progress &progress, Stats *stats = NULL);
void refit(Progress& progress); void refit(Progress &progress);
protected: protected:
BVH(const BVHParams& params, const vector<Object*>& objects); BVH(const BVHParams &params, const vector<Object *> &objects);
/* Refit range of primitives. */ /* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox& bbox, uint& visibility); void refit_primitives(int start, int end, BoundBox &bbox, uint &visibility);
/* triangles and strands */ /* triangles and strands */
void pack_primitives(); void pack_primitives();
void pack_triangle(int idx, float4 storage[3]); void pack_triangle(int idx, float4 storage[3]);
/* merge instance BVH's */ /* merge instance BVH's */
void pack_instances(size_t nodes_size, size_t leaf_nodes_size); void pack_instances(size_t nodes_size, size_t leaf_nodes_size);
/* for subclasses to implement */ /* for subclasses to implement */
virtual void pack_nodes(const BVHNode *root) = 0; virtual void pack_nodes(const BVHNode *root) = 0;
virtual void refit_nodes() = 0; virtual void refit_nodes() = 0;
virtual BVHNode *widen_children_nodes(const BVHNode *root) = 0; virtual BVHNode *widen_children_nodes(const BVHNode *root) = 0;
}; };
/* Pack Utility */ /* Pack Utility */
struct BVHStackEntry struct BVHStackEntry {
{ const BVHNode *node;
const BVHNode *node; int idx;
int idx;
BVHStackEntry(const BVHNode *n = 0, int i = 0); BVHStackEntry(const BVHNode *n = 0, int i = 0);
int encodeIdx() const; int encodeIdx() const;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_H__ */ #endif /* __BVH_H__ */

View File

@@ -25,276 +25,268 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
BVH2::BVH2(const BVHParams& params_, const vector<Object*>& objects_) BVH2::BVH2(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
: BVH(params_, objects_)
{ {
} }
BVHNode *BVH2::widen_children_nodes(const BVHNode *root) BVHNode *BVH2::widen_children_nodes(const BVHNode *root)
{ {
return const_cast<BVHNode *>(root); return const_cast<BVHNode *>(root);
} }
void BVH2::pack_leaf(const BVHStackEntry& e, void BVH2::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
const LeafNode *leaf)
{ {
assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size()); assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
float4 data[BVH_NODE_LEAF_SIZE]; float4 data[BVH_NODE_LEAF_SIZE];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) { if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */ /* object */
data[0].x = __int_as_float(~(leaf->lo)); data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0); data[0].y = __int_as_float(0);
} }
else { else {
/* triangle */ /* triangle */
data[0].x = __int_as_float(leaf->lo); data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi); data[0].y = __int_as_float(leaf->hi);
} }
data[0].z = __uint_as_float(leaf->visibility); data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) { if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]); data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
} }
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE); memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
} }
void BVH2::pack_inner(const BVHStackEntry& e, void BVH2::pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1)
const BVHStackEntry& e0,
const BVHStackEntry& e1)
{ {
if(e0.node->is_unaligned || e1.node->is_unaligned) { if (e0.node->is_unaligned || e1.node->is_unaligned) {
pack_unaligned_inner(e, e0, e1); pack_unaligned_inner(e, e0, e1);
} else { }
pack_aligned_inner(e, e0, e1); else {
} pack_aligned_inner(e, e0, e1);
}
} }
void BVH2::pack_aligned_inner(const BVHStackEntry& e, void BVH2::pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry& e0, const BVHStackEntry &e0,
const BVHStackEntry& e1) const BVHStackEntry &e1)
{ {
pack_aligned_node(e.idx, pack_aligned_node(e.idx,
e0.node->bounds, e1.node->bounds, e0.node->bounds,
e0.encodeIdx(), e1.encodeIdx(), e1.node->bounds,
e0.node->visibility, e1.node->visibility); e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
} }
void BVH2::pack_aligned_node(int idx, void BVH2::pack_aligned_node(int idx,
const BoundBox& b0, const BoundBox &b0,
const BoundBox& b1, const BoundBox &b1,
int c0, int c1, int c0,
uint visibility0, uint visibility1) int c1,
uint visibility0,
uint visibility1)
{ {
assert(idx + BVH_NODE_SIZE <= pack.nodes.size()); assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size()); assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size()); assert(c1 < 0 || c1 < pack.nodes.size());
int4 data[BVH_NODE_SIZE] = { int4 data[BVH_NODE_SIZE] = {
make_int4(visibility0 & ~PATH_RAY_NODE_UNALIGNED, make_int4(
visibility1 & ~PATH_RAY_NODE_UNALIGNED, visibility0 & ~PATH_RAY_NODE_UNALIGNED, visibility1 & ~PATH_RAY_NODE_UNALIGNED, c0, c1),
c0, c1), make_int4(__float_as_int(b0.min.x),
make_int4(__float_as_int(b0.min.x), __float_as_int(b1.min.x),
__float_as_int(b1.min.x), __float_as_int(b0.max.x),
__float_as_int(b0.max.x), __float_as_int(b1.max.x)),
__float_as_int(b1.max.x)), make_int4(__float_as_int(b0.min.y),
make_int4(__float_as_int(b0.min.y), __float_as_int(b1.min.y),
__float_as_int(b1.min.y), __float_as_int(b0.max.y),
__float_as_int(b0.max.y), __float_as_int(b1.max.y)),
__float_as_int(b1.max.y)), make_int4(__float_as_int(b0.min.z),
make_int4(__float_as_int(b0.min.z), __float_as_int(b1.min.z),
__float_as_int(b1.min.z), __float_as_int(b0.max.z),
__float_as_int(b0.max.z), __float_as_int(b1.max.z)),
__float_as_int(b1.max.z)), };
};
memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE); memcpy(&pack.nodes[idx], data, sizeof(int4) * BVH_NODE_SIZE);
} }
void BVH2::pack_unaligned_inner(const BVHStackEntry& e, void BVH2::pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry& e0, const BVHStackEntry &e0,
const BVHStackEntry& e1) const BVHStackEntry &e1)
{ {
pack_unaligned_node(e.idx, pack_unaligned_node(e.idx,
e0.node->get_aligned_space(), e0.node->get_aligned_space(),
e1.node->get_aligned_space(), e1.node->get_aligned_space(),
e0.node->bounds, e0.node->bounds,
e1.node->bounds, e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(), e0.encodeIdx(),
e0.node->visibility, e1.node->visibility); e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
} }
void BVH2::pack_unaligned_node(int idx, void BVH2::pack_unaligned_node(int idx,
const Transform& aligned_space0, const Transform &aligned_space0,
const Transform& aligned_space1, const Transform &aligned_space1,
const BoundBox& bounds0, const BoundBox &bounds0,
const BoundBox& bounds1, const BoundBox &bounds1,
int c0, int c1, int c0,
uint visibility0, uint visibility1) int c1,
uint visibility0,
uint visibility1)
{ {
assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size()); assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size()); assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size()); assert(c1 < 0 || c1 < pack.nodes.size());
float4 data[BVH_UNALIGNED_NODE_SIZE]; float4 data[BVH_UNALIGNED_NODE_SIZE];
Transform space0 = BVHUnaligned::compute_node_transform(bounds0, Transform space0 = BVHUnaligned::compute_node_transform(bounds0, aligned_space0);
aligned_space0); Transform space1 = BVHUnaligned::compute_node_transform(bounds1, aligned_space1);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1, data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
aligned_space1); __int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED), __int_as_float(c0),
__int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED), __int_as_float(c1));
__int_as_float(c0),
__int_as_float(c1));
data[1] = space0.x; data[1] = space0.x;
data[2] = space0.y; data[2] = space0.y;
data[3] = space0.z; data[3] = space0.z;
data[4] = space1.x; data[4] = space1.x;
data[5] = space1.y; data[5] = space1.y;
data[6] = space1.z; data[6] = space1.z;
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE); memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_NODE_SIZE);
} }
void BVH2::pack_nodes(const BVHNode *root) void BVH2::pack_nodes(const BVHNode *root)
{ {
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT); const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT); const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes); assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes; const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size; size_t node_size;
if(params.use_unaligned_nodes) { if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT); node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) + (num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
(num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE; }
} else {
else { node_size = num_inner_nodes * BVH_NODE_SIZE;
node_size = num_inner_nodes * BVH_NODE_SIZE; }
} /* Resize arrays */
/* Resize arrays */ pack.nodes.clear();
pack.nodes.clear(); pack.leaf_nodes.clear();
pack.leaf_nodes.clear(); /* For top level BVH, first merge existing BVH's so we know the offsets. */
/* For top level BVH, first merge existing BVH's so we know the offsets. */ if (params.top_level) {
if(params.top_level) { pack_instances(node_size, num_leaf_nodes * BVH_NODE_LEAF_SIZE);
pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE); }
} else {
else { pack.nodes.resize(node_size);
pack.nodes.resize(node_size); pack.leaf_nodes.resize(num_leaf_nodes * BVH_NODE_LEAF_SIZE);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE); }
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0; int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack; vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2); stack.reserve(BVHParams::MAX_DEPTH * 2);
if(root->is_leaf()) { if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++)); stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
} }
else { else {
stack.push_back(BVHStackEntry(root, nextNodeIdx)); stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE : BVH_NODE_SIZE;
: BVH_NODE_SIZE; }
}
while(stack.size()) { while (stack.size()) {
BVHStackEntry e = stack.back(); BVHStackEntry e = stack.back();
stack.pop_back(); stack.pop_back();
if(e.node->is_leaf()) { if (e.node->is_leaf()) {
/* leaf node */ /* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node); const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf); pack_leaf(e, leaf);
} }
else { else {
/* inner node */ /* inner node */
int idx[2]; int idx[2];
for(int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if(e.node->get_child(i)->is_leaf()) { if (e.node->get_child(i)->is_leaf()) {
idx[i] = nextLeafNodeIdx++; idx[i] = nextLeafNodeIdx++;
} }
else { else {
idx[i] = nextNodeIdx; idx[i] = nextNodeIdx;
nextNodeIdx += e.node->get_child(i)->has_unaligned() nextNodeIdx += e.node->get_child(i)->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE :
? BVH_UNALIGNED_NODE_SIZE BVH_NODE_SIZE;
: BVH_NODE_SIZE; }
} }
}
stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0])); stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1])); stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]); pack_inner(e, stack[stack.size() - 2], stack[stack.size() - 1]);
} }
} }
assert(node_size == nextNodeIdx); assert(node_size == nextNodeIdx);
/* root index to start traversal at, to handle case of single leaf node */ /* root index to start traversal at, to handle case of single leaf node */
pack.root_index = (root->is_leaf())? -1: 0; pack.root_index = (root->is_leaf()) ? -1 : 0;
} }
void BVH2::refit_nodes() void BVH2::refit_nodes()
{ {
assert(!params.top_level); assert(!params.top_level);
BoundBox bbox = BoundBox::empty; BoundBox bbox = BoundBox::empty;
uint visibility = 0; uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility); refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
} }
void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility) void BVH2::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{ {
if(leaf) { if (leaf) {
/* refit leaf node */ /* refit leaf node */
assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size()); assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
const int4 *data = &pack.leaf_nodes[idx]; const int4 *data = &pack.leaf_nodes[idx];
const int c0 = data[0].x; const int c0 = data[0].x;
const int c1 = data[0].y; const int c1 = data[0].y;
BVH::refit_primitives(c0, c1, bbox, visibility); BVH::refit_primitives(c0, c1, bbox, visibility);
/* TODO(sergey): De-duplicate with pack_leaf(). */ /* TODO(sergey): De-duplicate with pack_leaf(). */
float4 leaf_data[BVH_NODE_LEAF_SIZE]; float4 leaf_data[BVH_NODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c0); leaf_data[0].x = __int_as_float(c0);
leaf_data[0].y = __int_as_float(c1); leaf_data[0].y = __int_as_float(c1);
leaf_data[0].z = __uint_as_float(visibility); leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(data[0].w); leaf_data[0].w = __uint_as_float(data[0].w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_NODE_LEAF_SIZE); memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
} }
else { else {
assert(idx + BVH_NODE_SIZE <= pack.nodes.size()); assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
const int4 *data = &pack.nodes[idx]; const int4 *data = &pack.nodes[idx];
const bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0; const bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
const int c0 = data[0].z; const int c0 = data[0].z;
const int c1 = data[0].w; const int c1 = data[0].w;
/* refit inner node, set bbox from children */ /* refit inner node, set bbox from children */
BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty; BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
uint visibility0 = 0, visibility1 = 0; uint visibility0 = 0, visibility1 = 0;
refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0); refit_node((c0 < 0) ? -c0 - 1 : c0, (c0 < 0), bbox0, visibility0);
refit_node((c1 < 0)? -c1-1: c1, (c1 < 0), bbox1, visibility1); refit_node((c1 < 0) ? -c1 - 1 : c1, (c1 < 0), bbox1, visibility1);
if(is_unaligned) { if (is_unaligned) {
Transform aligned_space = transform_identity(); Transform aligned_space = transform_identity();
pack_unaligned_node(idx, pack_unaligned_node(
aligned_space, aligned_space, idx, aligned_space, aligned_space, bbox0, bbox1, c0, c1, visibility0, visibility1);
bbox0, bbox1, }
c0, c1, else {
visibility0, pack_aligned_node(idx, bbox0, bbox1, c0, c1, visibility0, visibility1);
visibility1); }
}
else {
pack_aligned_node(idx,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
}
bbox.grow(bbox0); bbox.grow(bbox0);
bbox.grow(bbox1); bbox.grow(bbox1);
visibility = visibility0|visibility1; visibility = visibility0 | visibility1;
} }
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -34,8 +34,8 @@ class LeafNode;
class Object; class Object;
class Progress; class Progress;
#define BVH_NODE_SIZE 4 #define BVH_NODE_SIZE 4
#define BVH_NODE_LEAF_SIZE 1 #define BVH_NODE_LEAF_SIZE 1
#define BVH_UNALIGNED_NODE_SIZE 7 #define BVH_UNALIGNED_NODE_SIZE 7
/* BVH2 /* BVH2
@@ -43,48 +43,49 @@ class Progress;
* Typical BVH with each node having two children. * Typical BVH with each node having two children.
*/ */
class BVH2 : public BVH { class BVH2 : public BVH {
protected: protected:
/* constructor */ /* constructor */
friend class BVH; friend class BVH;
BVH2(const BVHParams& params, const vector<Object*>& objects); BVH2(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */ /* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override; virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* pack */ /* pack */
void pack_nodes(const BVHNode *root) override; void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
const LeafNode *leaf); void pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1);
void pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_aligned_inner(const BVHStackEntry& e, void pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry& e0, const BVHStackEntry &e0,
const BVHStackEntry& e1); const BVHStackEntry &e1);
void pack_aligned_node(int idx, void pack_aligned_node(int idx,
const BoundBox& b0, const BoundBox &b0,
const BoundBox& b1, const BoundBox &b1,
int c0, int c1, int c0,
uint visibility0, uint visibility1); int c1,
uint visibility0,
uint visibility1);
void pack_unaligned_inner(const BVHStackEntry& e, void pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry& e0, const BVHStackEntry &e0,
const BVHStackEntry& e1); const BVHStackEntry &e1);
void pack_unaligned_node(int idx, void pack_unaligned_node(int idx,
const Transform& aligned_space0, const Transform &aligned_space0,
const Transform& aligned_space1, const Transform &aligned_space1,
const BoundBox& b0, const BoundBox &b0,
const BoundBox& b1, const BoundBox &b1,
int c0, int c1, int c0,
uint visibility0, uint visibility1); int c1,
uint visibility0,
uint visibility1);
/* refit */ /* refit */
void refit_nodes() override; void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility); void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH2_H__ */ #endif /* __BVH2_H__ */

View File

@@ -31,141 +31,131 @@ CCL_NAMESPACE_BEGIN
* life easier all over the place. * life easier all over the place.
*/ */
BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_) BVH4::BVH4(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
: BVH(params_, objects_)
{ {
params.bvh_layout = BVH_LAYOUT_BVH4; params.bvh_layout = BVH_LAYOUT_BVH4;
} }
namespace { namespace {
BVHNode *bvh_node_merge_children_recursively(const BVHNode *node) BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
{ {
if(node->is_leaf()) { if (node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node)); return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
} }
/* Collect nodes of one layer deeper, allowing us to have more childrem in /* Collect nodes of one layer deeper, allowing us to have more childrem in
* an inner layer. */ * an inner layer. */
assert(node->num_children() <= 2); assert(node->num_children() <= 2);
const BVHNode *children[4]; const BVHNode *children[4];
const BVHNode *child0 = node->get_child(0); const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1); const BVHNode *child1 = node->get_child(1);
int num_children = 0; int num_children = 0;
if(child0->is_leaf()) { if (child0->is_leaf()) {
children[num_children++] = child0; children[num_children++] = child0;
} }
else { else {
children[num_children++] = child0->get_child(0); children[num_children++] = child0->get_child(0);
children[num_children++] = child0->get_child(1); children[num_children++] = child0->get_child(1);
} }
if(child1->is_leaf()) { if (child1->is_leaf()) {
children[num_children++] = child1; children[num_children++] = child1;
} }
else { else {
children[num_children++] = child1->get_child(0); children[num_children++] = child1->get_child(0);
children[num_children++] = child1->get_child(1); children[num_children++] = child1->get_child(1);
} }
/* Merge children in subtrees. */ /* Merge children in subtrees. */
BVHNode *children4[4]; BVHNode *children4[4];
for(int i = 0; i < num_children; ++i) { for (int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]); children4[i] = bvh_node_merge_children_recursively(children[i]);
} }
/* Allocate new node. */ /* Allocate new node. */
BVHNode *node4 = new InnerNode(node->bounds, children4, num_children); BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
/* TODO(sergey): Consider doing this from the InnerNode() constructor. /* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the * But in order to do this nicely need to think of how to pass all the
* parameters there. */ * parameters there. */
if(node->is_unaligned) { if (node->is_unaligned) {
node4->is_unaligned = true; node4->is_unaligned = true;
node4->aligned_space = new Transform(); node4->aligned_space = new Transform();
*node4->aligned_space = *node->aligned_space; *node4->aligned_space = *node->aligned_space;
} }
return node4; return node4;
} }
} // namespace } // namespace
BVHNode *BVH4::widen_children_nodes(const BVHNode *root) BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
{ {
if(root == NULL) { if (root == NULL) {
return NULL; return NULL;
} }
if(root->is_leaf()) { if (root->is_leaf()) {
return const_cast<BVHNode *>(root); return const_cast<BVHNode *>(root);
} }
BVHNode *root4 = bvh_node_merge_children_recursively(root); BVHNode *root4 = bvh_node_merge_children_recursively(root);
/* TODO(sergey): Pack children nodes to parents which has less that 4 /* TODO(sergey): Pack children nodes to parents which has less that 4
* children. */ * children. */
return root4; return root4;
} }
void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf) void BVH4::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{ {
float4 data[BVH_QNODE_LEAF_SIZE]; float4 data[BVH_QNODE_LEAF_SIZE];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) { if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */ /* object */
data[0].x = __int_as_float(~(leaf->lo)); data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0); data[0].y = __int_as_float(0);
} }
else { else {
/* triangle */ /* triangle */
data[0].x = __int_as_float(leaf->lo); data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi); data[0].y = __int_as_float(leaf->hi);
} }
data[0].z = __uint_as_float(leaf->visibility); data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) { if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]); data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
} }
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE); memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
} }
void BVH4::pack_inner(const BVHStackEntry& e, void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
const BVHStackEntry *en,
int num)
{ {
bool has_unaligned = false; bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned /* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here. * and we can cut some corner here.
*/ */
if(params.use_unaligned_nodes) { if (params.use_unaligned_nodes) {
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
if(en[i].node->is_unaligned) { if (en[i].node->is_unaligned) {
has_unaligned = true; has_unaligned = true;
break; break;
} }
} }
} }
if(has_unaligned) { if (has_unaligned) {
/* There's no unaligned children, pack into AABB node. */ /* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num); pack_unaligned_inner(e, en, num);
} }
else { else {
/* Create unaligned node with orientation transform for each of the /* Create unaligned node with orientation transform for each of the
* children. * children.
*/ */
pack_aligned_inner(e, en, num); pack_aligned_inner(e, en, num);
} }
} }
void BVH4::pack_aligned_inner(const BVHStackEntry& e, void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
const BVHStackEntry *en,
int num)
{ {
BoundBox bounds[4]; BoundBox bounds[4];
int child[4]; int child[4];
for(int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds; bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx(); child[i] = en[i].encodeIdx();
} }
pack_aligned_node(e.idx, pack_aligned_node(
bounds, e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
} }
void BVH4::pack_aligned_node(int idx, void BVH4::pack_aligned_node(int idx,
@@ -176,66 +166,64 @@ void BVH4::pack_aligned_node(int idx,
const float time_to, const float time_to,
const int num) const int num)
{ {
float4 data[BVH_QNODE_SIZE]; float4 data[BVH_QNODE_SIZE];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED); data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from; data[0].y = time_from;
data[0].z = time_to; data[0].z = time_to;
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min; float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max; float3 bb_max = bounds[i].max;
data[1][i] = bb_min.x; data[1][i] = bb_min.x;
data[2][i] = bb_max.x; data[2][i] = bb_max.x;
data[3][i] = bb_min.y; data[3][i] = bb_min.y;
data[4][i] = bb_max.y; data[4][i] = bb_max.y;
data[5][i] = bb_min.z; data[5][i] = bb_min.z;
data[6][i] = bb_max.z; data[6][i] = bb_max.z;
data[7][i] = __int_as_float(child[i]); data[7][i] = __int_as_float(child[i]);
} }
for(int i = num; i < 4; i++) { for (int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection /* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes. * so kernel might safely assume there are always 4 child nodes.
*/ */
data[1][i] = FLT_MAX; data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX; data[2][i] = -FLT_MAX;
data[3][i] = FLT_MAX; data[3][i] = FLT_MAX;
data[4][i] = -FLT_MAX; data[4][i] = -FLT_MAX;
data[5][i] = FLT_MAX; data[5][i] = FLT_MAX;
data[6][i] = -FLT_MAX; data[6][i] = -FLT_MAX;
data[7][i] = __int_as_float(0); data[7][i] = __int_as_float(0);
} }
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE); memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
} }
void BVH4::pack_unaligned_inner(const BVHStackEntry& e, void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
const BVHStackEntry *en,
int num)
{ {
Transform aligned_space[4]; Transform aligned_space[4];
BoundBox bounds[4]; BoundBox bounds[4];
int child[4]; int child[4];
for(int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space(); aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds; bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx(); child[i] = en[i].encodeIdx();
} }
pack_unaligned_node(e.idx, pack_unaligned_node(e.idx,
aligned_space, aligned_space,
bounds, bounds,
child, child,
e.node->visibility, e.node->visibility,
e.node->time_from, e.node->time_from,
e.node->time_to, e.node->time_to,
num); num);
} }
void BVH4::pack_unaligned_node(int idx, void BVH4::pack_unaligned_node(int idx,
@@ -247,235 +235,211 @@ void BVH4::pack_unaligned_node(int idx,
const float time_to, const float time_to,
const int num) const int num)
{ {
float4 data[BVH_UNALIGNED_QNODE_SIZE]; float4 data[BVH_UNALIGNED_QNODE_SIZE];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED); data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from; data[0].y = time_from;
data[0].z = time_to; data[0].z = time_to;
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform( Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
bounds[i],
aligned_space[i]);
data[1][i] = space.x.x; data[1][i] = space.x.x;
data[2][i] = space.x.y; data[2][i] = space.x.y;
data[3][i] = space.x.z; data[3][i] = space.x.z;
data[4][i] = space.y.x; data[4][i] = space.y.x;
data[5][i] = space.y.y; data[5][i] = space.y.y;
data[6][i] = space.y.z; data[6][i] = space.y.z;
data[7][i] = space.z.x; data[7][i] = space.z.x;
data[8][i] = space.z.y; data[8][i] = space.z.y;
data[9][i] = space.z.z; data[9][i] = space.z.z;
data[10][i] = space.x.w; data[10][i] = space.x.w;
data[11][i] = space.y.w; data[11][i] = space.y.w;
data[12][i] = space.z.w; data[12][i] = space.z.w;
data[13][i] = __int_as_float(child[i]); data[13][i] = __int_as_float(child[i]);
} }
for(int i = num; i < 4; i++) { for (int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection /* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes. * so kernel might safely assume there are always 4 child nodes.
*/ */
data[1][i] = NAN; data[1][i] = NAN;
data[2][i] = NAN; data[2][i] = NAN;
data[3][i] = NAN; data[3][i] = NAN;
data[4][i] = NAN; data[4][i] = NAN;
data[5][i] = NAN; data[5][i] = NAN;
data[6][i] = NAN; data[6][i] = NAN;
data[7][i] = NAN; data[7][i] = NAN;
data[8][i] = NAN; data[8][i] = NAN;
data[9][i] = NAN; data[9][i] = NAN;
data[10][i] = NAN; data[10][i] = NAN;
data[11][i] = NAN; data[11][i] = NAN;
data[12][i] = NAN; data[12][i] = NAN;
data[13][i] = __int_as_float(0); data[13][i] = __int_as_float(0);
} }
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE); memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_QNODE_SIZE);
} }
/* Quad SIMD Nodes */ /* Quad SIMD Nodes */
void BVH4::pack_nodes(const BVHNode *root) void BVH4::pack_nodes(const BVHNode *root)
{ {
/* Calculate size of the arrays required. */ /* Calculate size of the arrays required. */
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT); const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT); const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes); assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes; const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size; size_t node_size;
if(params.use_unaligned_nodes) { if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT); node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) + (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
(num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE; }
} else {
else { node_size = num_inner_nodes * BVH_QNODE_SIZE;
node_size = num_inner_nodes * BVH_QNODE_SIZE; }
} /* Resize arrays. */
/* Resize arrays. */ pack.nodes.clear();
pack.nodes.clear(); pack.leaf_nodes.clear();
pack.leaf_nodes.clear(); /* For top level BVH, first merge existing BVH's so we know the offsets. */
/* For top level BVH, first merge existing BVH's so we know the offsets. */ if (params.top_level) {
if(params.top_level) { pack_instances(node_size, num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE); }
} else {
else { pack.nodes.resize(node_size);
pack.nodes.resize(node_size); pack.leaf_nodes.resize(num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE); }
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0; int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack; vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2); stack.reserve(BVHParams::MAX_DEPTH * 2);
if(root->is_leaf()) { if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++)); stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
} }
else { else {
stack.push_back(BVHStackEntry(root, nextNodeIdx)); stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
: BVH_QNODE_SIZE; }
}
while(stack.size()) { while (stack.size()) {
BVHStackEntry e = stack.back(); BVHStackEntry e = stack.back();
stack.pop_back(); stack.pop_back();
if(e.node->is_leaf()) { if (e.node->is_leaf()) {
/* leaf node */ /* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node); const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf); pack_leaf(e, leaf);
} }
else { else {
/* Inner node. */ /* Inner node. */
/* Collect nodes. */ /* Collect nodes. */
const BVHNode *children[4]; const BVHNode *children[4];
const int num_children = e.node->num_children(); const int num_children = e.node->num_children();
/* Push entries on the stack. */ /* Push entries on the stack. */
for(int i = 0; i < num_children; ++i) { for (int i = 0; i < num_children; ++i) {
int idx; int idx;
children[i] = e.node->get_child(i); children[i] = e.node->get_child(i);
assert(children[i] != NULL); assert(children[i] != NULL);
if(children[i]->is_leaf()) { if (children[i]->is_leaf()) {
idx = nextLeafNodeIdx++; idx = nextLeafNodeIdx++;
} }
else { else {
idx = nextNodeIdx; idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned() nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
? BVH_UNALIGNED_QNODE_SIZE }
: BVH_QNODE_SIZE; stack.push_back(BVHStackEntry(children[i], idx));
} }
stack.push_back(BVHStackEntry(children[i], idx)); /* Set node. */
} pack_inner(e, &stack[stack.size() - num_children], num_children);
/* Set node. */ }
pack_inner(e, &stack[stack.size() - num_children], num_children); }
}
}
assert(node_size == nextNodeIdx); assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */ /* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf())? -1: 0; pack.root_index = (root->is_leaf()) ? -1 : 0;
} }
void BVH4::refit_nodes() void BVH4::refit_nodes()
{ {
assert(!params.top_level); assert(!params.top_level);
BoundBox bbox = BoundBox::empty; BoundBox bbox = BoundBox::empty;
uint visibility = 0; uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility); refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
} }
void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility) void BVH4::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{ {
if(leaf) { if (leaf) {
/* Refit leaf node. */ /* Refit leaf node. */
int4 *data = &pack.leaf_nodes[idx]; int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0]; int4 c = data[0];
BVH::refit_primitives(c.x, c.y, bbox, visibility); BVH::refit_primitives(c.x, c.y, bbox, visibility);
/* TODO(sergey): This is actually a copy of pack_leaf(), /* TODO(sergey): This is actually a copy of pack_leaf(),
* but this chunk of code only knows actual data and has * but this chunk of code only knows actual data and has
* no idea about BVHNode. * no idea about BVHNode.
* *
* Would be nice to de-duplicate code, but trying to make * Would be nice to de-duplicate code, but trying to make
* making code more general ends up in much nastier code * making code more general ends up in much nastier code
* in my opinion so far. * in my opinion so far.
* *
* Same applies to the inner nodes case below. * Same applies to the inner nodes case below.
*/ */
float4 leaf_data[BVH_QNODE_LEAF_SIZE]; float4 leaf_data[BVH_QNODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c.x); leaf_data[0].x = __int_as_float(c.x);
leaf_data[0].y = __int_as_float(c.y); leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility); leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w); leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE); memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
} }
else { else {
int4 *data = &pack.nodes[idx]; int4 *data = &pack.nodes[idx];
bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0; bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
int4 c; int4 c;
if(is_unaligned) { if (is_unaligned) {
c = data[13]; c = data[13];
} }
else { else {
c = data[7]; c = data[7];
} }
/* Refit inner node, set bbox from children. */ /* Refit inner node, set bbox from children. */
BoundBox child_bbox[4] = {BoundBox::empty, BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
BoundBox::empty, uint child_visibility[4] = {0};
BoundBox::empty, int num_nodes = 0;
BoundBox::empty};
uint child_visibility[4] = {0};
int num_nodes = 0;
for(int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if(c[i] != 0) { if (c[i] != 0) {
refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0), refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
child_bbox[i], child_visibility[i]); ++num_nodes;
++num_nodes; bbox.grow(child_bbox[i]);
bbox.grow(child_bbox[i]); visibility |= child_visibility[i];
visibility |= child_visibility[i]; }
} }
}
if(is_unaligned) { if (is_unaligned) {
Transform aligned_space[4] = {transform_identity(), Transform aligned_space[4] = {
transform_identity(), transform_identity(), transform_identity(), transform_identity(), transform_identity()};
transform_identity(), pack_unaligned_node(
transform_identity()}; idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
pack_unaligned_node(idx, }
aligned_space, else {
child_bbox, pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
&c[0], }
visibility, }
0.0f,
1.0f,
num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
}
}
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -34,8 +34,8 @@ class LeafNode;
class Object; class Object;
class Progress; class Progress;
#define BVH_QNODE_SIZE 8 #define BVH_QNODE_SIZE 8
#define BVH_QNODE_LEAF_SIZE 1 #define BVH_QNODE_LEAF_SIZE 1
#define BVH_UNALIGNED_QNODE_SIZE 14 #define BVH_UNALIGNED_QNODE_SIZE 14
/* BVH4 /* BVH4
@@ -43,48 +43,44 @@ class Progress;
* Quad BVH, with each node having four children, to use with SIMD instructions. * Quad BVH, with each node having four children, to use with SIMD instructions.
*/ */
class BVH4 : public BVH { class BVH4 : public BVH {
protected: protected:
/* constructor */ /* constructor */
friend class BVH; friend class BVH;
BVH4(const BVHParams& params, const vector<Object*>& objects); BVH4(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */ /* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override; virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* pack */ /* pack */
void pack_nodes(const BVHNode *root) override; void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf); void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num); void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e, void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
const BVHStackEntry *en, void pack_aligned_node(int idx,
int num); const BoundBox *bounds,
void pack_aligned_node(int idx, const int *child,
const BoundBox *bounds, const uint visibility,
const int *child, const float time_from,
const uint visibility, const float time_to,
const float time_from, const int num);
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e, void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
const BVHStackEntry *en, void pack_unaligned_node(int idx,
int num); const Transform *aligned_space,
void pack_unaligned_node(int idx, const BoundBox *bounds,
const Transform *aligned_space, const int *child,
const BoundBox *bounds, const uint visibility,
const int *child, const float time_from,
const uint visibility, const float time_to,
const float time_from, const int num);
const float time_to,
const int num);
/* refit */ /* refit */
void refit_nodes() override; void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility); void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH4_H__ */ #endif /* __BVH4_H__ */

View File

@@ -36,8 +36,7 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
BVH8::BVH8(const BVHParams& params_, const vector<Object*>& objects_) BVH8::BVH8(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
: BVH(params_, objects_)
{ {
} }
@@ -45,159 +44,148 @@ namespace {
BVHNode *bvh_node_merge_children_recursively(const BVHNode *node) BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
{ {
if(node->is_leaf()) { if (node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node)); return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
} }
/* Collect nodes of two layer deeper, allowing us to have more childrem in /* Collect nodes of two layer deeper, allowing us to have more childrem in
* an inner layer. */ * an inner layer. */
assert(node->num_children() <= 2); assert(node->num_children() <= 2);
const BVHNode *children[8]; const BVHNode *children[8];
const BVHNode *child0 = node->get_child(0); const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1); const BVHNode *child1 = node->get_child(1);
int num_children = 0; int num_children = 0;
if(child0->is_leaf()) { if (child0->is_leaf()) {
children[num_children++] = child0; children[num_children++] = child0;
} }
else { else {
const BVHNode *child00 = child0->get_child(0), const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
*child01 = child0->get_child(1); if (child00->is_leaf()) {
if(child00->is_leaf()) { children[num_children++] = child00;
children[num_children++] = child00; }
} else {
else { children[num_children++] = child00->get_child(0);
children[num_children++] = child00->get_child(0); children[num_children++] = child00->get_child(1);
children[num_children++] = child00->get_child(1); }
} if (child01->is_leaf()) {
if(child01->is_leaf()) { children[num_children++] = child01;
children[num_children++] = child01; }
} else {
else { children[num_children++] = child01->get_child(0);
children[num_children++] = child01->get_child(0); children[num_children++] = child01->get_child(1);
children[num_children++] = child01->get_child(1); }
} }
} if (child1->is_leaf()) {
if(child1->is_leaf()) { children[num_children++] = child1;
children[num_children++] = child1; }
} else {
else { const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
const BVHNode *child10 = child1->get_child(0), if (child10->is_leaf()) {
*child11 = child1->get_child(1); children[num_children++] = child10;
if(child10->is_leaf()) { }
children[num_children++] = child10; else {
} children[num_children++] = child10->get_child(0);
else { children[num_children++] = child10->get_child(1);
children[num_children++] = child10->get_child(0); }
children[num_children++] = child10->get_child(1); if (child11->is_leaf()) {
} children[num_children++] = child11;
if(child11->is_leaf()) { }
children[num_children++] = child11; else {
} children[num_children++] = child11->get_child(0);
else { children[num_children++] = child11->get_child(1);
children[num_children++] = child11->get_child(0); }
children[num_children++] = child11->get_child(1); }
} /* Merge children in subtrees. */
} BVHNode *children4[8];
/* Merge children in subtrees. */ for (int i = 0; i < num_children; ++i) {
BVHNode *children4[8]; children4[i] = bvh_node_merge_children_recursively(children[i]);
for(int i = 0; i < num_children; ++i) { }
children4[i] = bvh_node_merge_children_recursively(children[i]); /* Allocate new node. */
} BVHNode *node8 = new InnerNode(node->bounds, children4, num_children);
/* Allocate new node. */ /* TODO(sergey): Consider doing this from the InnerNode() constructor.
BVHNode *node8 = new InnerNode(node->bounds, children4, num_children); * But in order to do this nicely need to think of how to pass all the
/* TODO(sergey): Consider doing this from the InnerNode() constructor. * parameters there. */
* But in order to do this nicely need to think of how to pass all the if (node->is_unaligned) {
* parameters there. */ node8->is_unaligned = true;
if(node->is_unaligned) { node8->aligned_space = new Transform();
node8->is_unaligned = true; *node8->aligned_space = *node->aligned_space;
node8->aligned_space = new Transform(); }
*node8->aligned_space = *node->aligned_space; return node8;
}
return node8;
} }
} // namespace } // namespace
BVHNode *BVH8::widen_children_nodes(const BVHNode *root) BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
{ {
if(root == NULL) { if (root == NULL) {
return NULL; return NULL;
} }
if(root->is_leaf()) { if (root->is_leaf()) {
return const_cast<BVHNode *>(root); return const_cast<BVHNode *>(root);
} }
BVHNode *root8 = bvh_node_merge_children_recursively(root); BVHNode *root8 = bvh_node_merge_children_recursively(root);
/* TODO(sergey): Pack children nodes to parents which has less that 4 /* TODO(sergey): Pack children nodes to parents which has less that 4
* children. */ * children. */
return root8; return root8;
} }
void BVH8::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf) void BVH8::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{ {
float4 data[BVH_ONODE_LEAF_SIZE]; float4 data[BVH_ONODE_LEAF_SIZE];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) { if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */ /* object */
data[0].x = __int_as_float(~(leaf->lo)); data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0); data[0].y = __int_as_float(0);
} }
else { else {
/* triangle */ /* triangle */
data[0].x = __int_as_float(leaf->lo); data[0].x = __int_as_float(leaf->lo);
data[0].y = __int_as_float(leaf->hi); data[0].y = __int_as_float(leaf->hi);
} }
data[0].z = __uint_as_float(leaf->visibility); data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) { if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]); data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
} }
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_ONODE_LEAF_SIZE); memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
} }
void BVH8::pack_inner(const BVHStackEntry& e, void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
const BVHStackEntry *en,
int num)
{ {
bool has_unaligned = false; bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned /* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here. * and we can cut some corner here.
*/ */
if(params.use_unaligned_nodes) { if (params.use_unaligned_nodes) {
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
if(en[i].node->is_unaligned) { if (en[i].node->is_unaligned) {
has_unaligned = true; has_unaligned = true;
break; break;
} }
} }
} }
if(has_unaligned) { if (has_unaligned) {
/* There's no unaligned children, pack into AABB node. */ /* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num); pack_unaligned_inner(e, en, num);
} }
else { else {
/* Create unaligned node with orientation transform for each of the /* Create unaligned node with orientation transform for each of the
* children. * children.
*/ */
pack_aligned_inner(e, en, num); pack_aligned_inner(e, en, num);
} }
} }
void BVH8::pack_aligned_inner(const BVHStackEntry& e, void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
const BVHStackEntry *en,
int num)
{ {
BoundBox bounds[8]; BoundBox bounds[8];
int child[8]; int child[8];
for(int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds; bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx(); child[i] = en[i].encodeIdx();
} }
pack_aligned_node(e.idx, pack_aligned_node(
bounds, e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
} }
void BVH8::pack_aligned_node(int idx, void BVH8::pack_aligned_node(int idx,
@@ -208,66 +196,64 @@ void BVH8::pack_aligned_node(int idx,
const float time_to, const float time_to,
const int num) const int num)
{ {
float8 data[8]; float8 data[8];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED); data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from; data[0].b = time_from;
data[0].c = time_to; data[0].c = time_to;
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min; float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max; float3 bb_max = bounds[i].max;
data[1][i] = bb_min.x; data[1][i] = bb_min.x;
data[2][i] = bb_max.x; data[2][i] = bb_max.x;
data[3][i] = bb_min.y; data[3][i] = bb_min.y;
data[4][i] = bb_max.y; data[4][i] = bb_max.y;
data[5][i] = bb_min.z; data[5][i] = bb_min.z;
data[6][i] = bb_max.z; data[6][i] = bb_max.z;
data[7][i] = __int_as_float(child[i]); data[7][i] = __int_as_float(child[i]);
} }
for(int i = num; i < 8; i++) { for (int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection /* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes. * so kernel might safely assume there are always 4 child nodes.
*/ */
data[1][i] = FLT_MAX; data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX; data[2][i] = -FLT_MAX;
data[3][i] = FLT_MAX; data[3][i] = FLT_MAX;
data[4][i] = -FLT_MAX; data[4][i] = -FLT_MAX;
data[5][i] = FLT_MAX; data[5][i] = FLT_MAX;
data[6][i] = -FLT_MAX; data[6][i] = -FLT_MAX;
data[7][i] = __int_as_float(0); data[7][i] = __int_as_float(0);
} }
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_ONODE_SIZE); memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
} }
void BVH8::pack_unaligned_inner(const BVHStackEntry& e, void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
const BVHStackEntry *en,
int num)
{ {
Transform aligned_space[8]; Transform aligned_space[8];
BoundBox bounds[8]; BoundBox bounds[8];
int child[8]; int child[8];
for(int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space(); aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds; bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx(); child[i] = en[i].encodeIdx();
} }
pack_unaligned_node(e.idx, pack_unaligned_node(e.idx,
aligned_space, aligned_space,
bounds, bounds,
child, child,
e.node->visibility, e.node->visibility,
e.node->time_from, e.node->time_from,
e.node->time_to, e.node->time_to,
num); num);
} }
void BVH8::pack_unaligned_node(int idx, void BVH8::pack_unaligned_node(int idx,
@@ -279,283 +265,275 @@ void BVH8::pack_unaligned_node(int idx,
const float time_to, const float time_to,
const int num) const int num)
{ {
float8 data[BVH_UNALIGNED_ONODE_SIZE]; float8 data[BVH_UNALIGNED_ONODE_SIZE];
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED); data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from; data[0].b = time_from;
data[0].c = time_to; data[0].c = time_to;
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform( Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
bounds[i],
aligned_space[i]);
data[1][i] = space.x.x; data[1][i] = space.x.x;
data[2][i] = space.x.y; data[2][i] = space.x.y;
data[3][i] = space.x.z; data[3][i] = space.x.z;
data[4][i] = space.y.x; data[4][i] = space.y.x;
data[5][i] = space.y.y; data[5][i] = space.y.y;
data[6][i] = space.y.z; data[6][i] = space.y.z;
data[7][i] = space.z.x; data[7][i] = space.z.x;
data[8][i] = space.z.y; data[8][i] = space.z.y;
data[9][i] = space.z.z; data[9][i] = space.z.z;
data[10][i] = space.x.w; data[10][i] = space.x.w;
data[11][i] = space.y.w; data[11][i] = space.y.w;
data[12][i] = space.z.w; data[12][i] = space.z.w;
data[13][i] = __int_as_float(child[i]); data[13][i] = __int_as_float(child[i]);
} }
for(int i = num; i < 8; i++) { for (int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection /* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes. * so kernel might safely assume there are always 4 child nodes.
*/ */
data[1][i] = NAN; data[1][i] = NAN;
data[2][i] = NAN; data[2][i] = NAN;
data[3][i] = NAN; data[3][i] = NAN;
data[4][i] = NAN; data[4][i] = NAN;
data[5][i] = NAN; data[5][i] = NAN;
data[6][i] = NAN; data[6][i] = NAN;
data[7][i] = NAN; data[7][i] = NAN;
data[8][i] = NAN; data[8][i] = NAN;
data[9][i] = NAN; data[9][i] = NAN;
data[10][i] = NAN; data[10][i] = NAN;
data[11][i] = NAN; data[11][i] = NAN;
data[12][i] = NAN; data[12][i] = NAN;
data[13][i] = __int_as_float(0); data[13][i] = __int_as_float(0);
} }
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_ONODE_SIZE); memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_ONODE_SIZE);
} }
/* Quad SIMD Nodes */ /* Quad SIMD Nodes */
void BVH8::pack_nodes(const BVHNode *root) void BVH8::pack_nodes(const BVHNode *root)
{ {
/* Calculate size of the arrays required. */ /* Calculate size of the arrays required. */
const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT); const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT); const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
assert(num_leaf_nodes <= num_nodes); assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes; const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size; size_t node_size;
if(params.use_unaligned_nodes) { if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT); node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) + (num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
(num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE; }
} else {
else { node_size = num_inner_nodes * BVH_ONODE_SIZE;
node_size = num_inner_nodes * BVH_ONODE_SIZE; }
} /* Resize arrays. */
/* Resize arrays. */ pack.nodes.clear();
pack.nodes.clear(); pack.leaf_nodes.clear();
pack.leaf_nodes.clear(); /* For top level BVH, first merge existing BVH's so we know the offsets. */
/* For top level BVH, first merge existing BVH's so we know the offsets. */ if (params.top_level) {
if(params.top_level) { pack_instances(node_size, num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
pack_instances(node_size, num_leaf_nodes*BVH_ONODE_LEAF_SIZE); }
} else {
else { pack.nodes.resize(node_size);
pack.nodes.resize(node_size); pack.leaf_nodes.resize(num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_ONODE_LEAF_SIZE); }
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0; int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack; vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2); stack.reserve(BVHParams::MAX_DEPTH * 2);
if(root->is_leaf()) { if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++)); stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
} }
else { else {
stack.push_back(BVHStackEntry(root, nextNodeIdx)); stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
: BVH_ONODE_SIZE; }
}
while(stack.size()) { while (stack.size()) {
BVHStackEntry e = stack.back(); BVHStackEntry e = stack.back();
stack.pop_back(); stack.pop_back();
if(e.node->is_leaf()) { if (e.node->is_leaf()) {
/* leaf node */ /* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node); const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf); pack_leaf(e, leaf);
} }
else { else {
/* Inner node. */ /* Inner node. */
/* Collect nodes. */ /* Collect nodes. */
const BVHNode *children[8]; const BVHNode *children[8];
int num_children = e.node->num_children(); int num_children = e.node->num_children();
/* Push entries on the stack. */ /* Push entries on the stack. */
for(int i = 0; i < num_children; ++i) { for (int i = 0; i < num_children; ++i) {
int idx; int idx;
children[i] = e.node->get_child(i); children[i] = e.node->get_child(i);
if(children[i]->is_leaf()) { if (children[i]->is_leaf()) {
idx = nextLeafNodeIdx++; idx = nextLeafNodeIdx++;
} }
else { else {
idx = nextNodeIdx; idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned() nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
? BVH_UNALIGNED_ONODE_SIZE }
: BVH_ONODE_SIZE; stack.push_back(BVHStackEntry(children[i], idx));
} }
stack.push_back(BVHStackEntry(children[i], idx)); /* Set node. */
} pack_inner(e, &stack[stack.size() - num_children], num_children);
/* Set node. */ }
pack_inner(e, &stack[stack.size() - num_children], num_children); }
}
}
assert(node_size == nextNodeIdx); assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */ /* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf()) ? -1 : 0; pack.root_index = (root->is_leaf()) ? -1 : 0;
} }
void BVH8::refit_nodes() void BVH8::refit_nodes()
{ {
assert(!params.top_level); assert(!params.top_level);
BoundBox bbox = BoundBox::empty; BoundBox bbox = BoundBox::empty;
uint visibility = 0; uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility); refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
} }
void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility) void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{ {
if(leaf) { if (leaf) {
int4 *data = &pack.leaf_nodes[idx]; int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0]; int4 c = data[0];
/* Refit leaf node. */ /* Refit leaf node. */
for(int prim = c.x; prim < c.y; prim++) { for (int prim = c.x; prim < c.y; prim++) {
int pidx = pack.prim_index[prim]; int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim]; int tob = pack.prim_object[prim];
Object *ob = objects[tob]; Object *ob = objects[tob];
if(pidx == -1) { if (pidx == -1) {
/* Object instance. */ /* Object instance. */
bbox.grow(ob->bounds); bbox.grow(ob->bounds);
} }
else { else {
/* Primitives. */ /* Primitives. */
const Mesh *mesh = ob->mesh; const Mesh *mesh = ob->mesh;
if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) { if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */ /* Curves. */
int str_offset = (params.top_level) ? mesh->curve_offset : 0; int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset); Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]); int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox); curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
visibility |= PATH_RAY_CURVE; visibility |= PATH_RAY_CURVE;
/* Motion curves. */ /* Motion curves. */
if(mesh->use_motion_blur) { if (mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) { if (attr) {
size_t mesh_size = mesh->curve_keys.size(); size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1; size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3(); float3 *key_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) { for (size_t i = 0; i < steps; i++) {
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox); curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
} }
} }
} }
} }
else { else {
/* Triangles. */ /* Triangles. */
int tri_offset = (params.top_level) ? mesh->tri_offset : 0; int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset); Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0]; const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox); triangle.bounds_grow(vpos, bbox);
/* Motion triangles. */ /* Motion triangles. */
if(mesh->use_motion_blur) { if (mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) { if (attr) {
size_t mesh_size = mesh->verts.size(); size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1; size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3(); float3 *vert_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) { for (size_t i = 0; i < steps; i++) {
triangle.bounds_grow(vert_steps + i*mesh_size, bbox); triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
} }
} }
} }
} }
} }
visibility |= ob->visibility; visibility |= ob->visibility;
} }
float4 leaf_data[BVH_ONODE_LEAF_SIZE]; float4 leaf_data[BVH_ONODE_LEAF_SIZE];
leaf_data[0].x = __int_as_float(c.x); leaf_data[0].x = __int_as_float(c.x);
leaf_data[0].y = __int_as_float(c.y); leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility); leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w); leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_ONODE_LEAF_SIZE); memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
} }
else { else {
float8 *data = (float8*)&pack.nodes[idx]; float8 *data = (float8 *)&pack.nodes[idx];
bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0; bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
/* Refit inner node, set bbox from children. */ /* Refit inner node, set bbox from children. */
BoundBox child_bbox[8] = { BoundBox::empty, BoundBox::empty, BoundBox child_bbox[8] = {BoundBox::empty,
BoundBox::empty, BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty }; BoundBox::empty,
int child[8]; BoundBox::empty,
uint child_visibility[8] = { 0 }; BoundBox::empty,
int num_nodes = 0; BoundBox::empty,
BoundBox::empty};
int child[8];
uint child_visibility[8] = {0};
int num_nodes = 0;
for(int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
child[i] = __float_as_int(data[(is_unaligned) ? 13: 7][i]); child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
if(child[i] != 0) { if (child[i] != 0) {
refit_node((child[i] < 0)? -child[i]-1: child[i], (child[i] < 0), refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
child_bbox[i], child_visibility[i]); (child[i] < 0),
++num_nodes; child_bbox[i],
bbox.grow(child_bbox[i]); child_visibility[i]);
visibility |= child_visibility[i]; ++num_nodes;
} bbox.grow(child_bbox[i]);
} visibility |= child_visibility[i];
}
}
if(is_unaligned) { if (is_unaligned) {
Transform aligned_space[8] = { transform_identity(), transform_identity(), Transform aligned_space[8] = {transform_identity(),
transform_identity(), transform_identity(), transform_identity(),
transform_identity(), transform_identity(), transform_identity(),
transform_identity(), transform_identity()}; transform_identity(),
pack_unaligned_node(idx, transform_identity(),
aligned_space, transform_identity(),
child_bbox, transform_identity(),
child, transform_identity()};
visibility, pack_unaligned_node(
0.0f, idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
1.0f, }
num_nodes); else {
} pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
else { }
pack_aligned_node(idx, }
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
}
}
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -45,8 +45,8 @@ class LeafNode;
class Object; class Object;
class Progress; class Progress;
#define BVH_ONODE_SIZE 16 #define BVH_ONODE_SIZE 16
#define BVH_ONODE_LEAF_SIZE 1 #define BVH_ONODE_LEAF_SIZE 1
#define BVH_UNALIGNED_ONODE_SIZE 28 #define BVH_UNALIGNED_ONODE_SIZE 28
/* BVH8 /* BVH8
@@ -54,48 +54,44 @@ class Progress;
* Octo BVH, with each node having eight children, to use with SIMD instructions. * Octo BVH, with each node having eight children, to use with SIMD instructions.
*/ */
class BVH8 : public BVH { class BVH8 : public BVH {
protected: protected:
/* constructor */ /* constructor */
friend class BVH; friend class BVH;
BVH8(const BVHParams& params, const vector<Object*>& objects); BVH8(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */ /* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override; virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
/* pack */ /* pack */
void pack_nodes(const BVHNode *root) override; void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf); void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num); void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e, void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
const BVHStackEntry *en, void pack_aligned_node(int idx,
int num); const BoundBox *bounds,
void pack_aligned_node(int idx, const int *child,
const BoundBox *bounds, const uint visibility,
const int *child, const float time_from,
const uint visibility, const float time_to,
const float time_from, const int num);
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e, void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
const BVHStackEntry *en, void pack_unaligned_node(int idx,
int num); const Transform *aligned_space,
void pack_unaligned_node(int idx, const BoundBox *bounds,
const Transform *aligned_space, const int *child,
const BoundBox *bounds, const uint visibility,
const int *child, const float time_from,
const uint visibility, const float time_to,
const float time_from, const int num);
const float time_to,
const int num);
/* refit */ /* refit */
void refit_nodes() override; void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility); void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH8_H__ */ #endif /* __BVH8_H__ */

View File

@@ -29,225 +29,265 @@ CCL_NAMESPACE_BEGIN
/* SSE replacements */ /* SSE replacements */
__forceinline void prefetch_L1 (const void* /*ptr*/) { } __forceinline void prefetch_L1(const void * /*ptr*/)
__forceinline void prefetch_L2 (const void* /*ptr*/) { }
__forceinline void prefetch_L3 (const void* /*ptr*/) { }
__forceinline void prefetch_NTA(const void* /*ptr*/) { }
template<size_t src> __forceinline float extract(const int4& b)
{ return b[src]; }
template<size_t dst> __forceinline const float4 insert(const float4& a, const float b)
{ float4 r = a; r[dst] = b; return r; }
__forceinline int get_best_dimension(const float4& bestSAH)
{ {
// return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH)); }
__forceinline void prefetch_L2(const void * /*ptr*/)
{
}
__forceinline void prefetch_L3(const void * /*ptr*/)
{
}
__forceinline void prefetch_NTA(const void * /*ptr*/)
{
}
float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z)); template<size_t src> __forceinline float extract(const int4 &b)
{
return b[src];
}
template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
{
float4 r = a;
r[dst] = b;
return r;
}
if(bestSAH.x == minSAH) return 0; __forceinline int get_best_dimension(const float4 &bestSAH)
else if(bestSAH.y == minSAH) return 1; {
else return 2; // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
if (bestSAH.x == minSAH)
return 0;
else if (bestSAH.y == minSAH)
return 1;
else
return 2;
} }
/* BVH Object Binning */ /* BVH Object Binning */
BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
BVHReference *prims, BVHReference *prims,
const BVHUnaligned *unaligned_heuristic, const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space) const Transform *aligned_space)
: BVHRange(job), : BVHRange(job),
splitSAH(FLT_MAX), splitSAH(FLT_MAX),
dim(0), dim(0),
pos(0), pos(0),
unaligned_heuristic_(unaligned_heuristic), unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space) aligned_space_(aligned_space)
{ {
if(aligned_space_ == NULL) { if (aligned_space_ == NULL) {
bounds_ = bounds(); bounds_ = bounds();
cent_bounds_ = cent_bounds(); cent_bounds_ = cent_bounds();
} }
else { else {
/* TODO(sergey): With some additional storage we can avoid /* TODO(sergey): With some additional storage we can avoid
* need in re-calculating this. * need in re-calculating this.
*/ */
bounds_ = unaligned_heuristic->compute_aligned_boundbox( bounds_ = unaligned_heuristic->compute_aligned_boundbox(
*this, *this, prims, *aligned_space, &cent_bounds_);
prims, }
*aligned_space,
&cent_bounds_);
}
/* compute number of bins to use and precompute scaling factor for binning */ /* compute number of bins to use and precompute scaling factor for binning */
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size())); num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins); scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
/* initialize binning counter and bounds */ /* initialize binning counter and bounds */
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */ BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */ int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
for(size_t i = 0; i < num_bins; i++) { for (size_t i = 0; i < num_bins; i++) {
bin_count[i] = make_int4(0); bin_count[i] = make_int4(0);
bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty; bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
} }
/* map geometry to bins, unrolled once */ /* map geometry to bins, unrolled once */
{ {
ssize_t i; ssize_t i;
for(i = 0; i < ssize_t(size()) - 1; i += 2) { for (i = 0; i < ssize_t(size()) - 1; i += 2) {
prefetch_L2(&prims[start() + i + 8]); prefetch_L2(&prims[start() + i + 8]);
/* map even and odd primitive to bin */ /* map even and odd primitive to bin */
const BVHReference& prim0 = prims[start() + i + 0]; const BVHReference &prim0 = prims[start() + i + 0];
const BVHReference& prim1 = prims[start() + i + 1]; const BVHReference &prim1 = prims[start() + i + 1];
BoundBox bounds0 = get_prim_bounds(prim0); BoundBox bounds0 = get_prim_bounds(prim0);
BoundBox bounds1 = get_prim_bounds(prim1); BoundBox bounds1 = get_prim_bounds(prim1);
int4 bin0 = get_bin(bounds0); int4 bin0 = get_bin(bounds0);
int4 bin1 = get_bin(bounds1); int4 bin1 = get_bin(bounds1);
/* increase bounds for bins for even primitive */ /* increase bounds for bins for even primitive */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0); int b00 = (int)extract<0>(bin0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0); bin_count[b00][0]++;
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0); bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
/* increase bounds of bins for odd primitive */ /* increase bounds of bins for odd primitive */
int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(bounds1); int b10 = (int)extract<0>(bin1);
int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(bounds1); bin_count[b10][0]++;
int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(bounds1); bin_bounds[b10][0].grow(bounds1);
} int b11 = (int)extract<1>(bin1);
bin_count[b11][1]++;
bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1);
bin_count[b12][2]++;
bin_bounds[b12][2].grow(bounds1);
}
/* for uneven number of primitives */ /* for uneven number of primitives */
if(i < ssize_t(size())) { if (i < ssize_t(size())) {
/* map primitive to bin */ /* map primitive to bin */
const BVHReference& prim0 = prims[start() + i]; const BVHReference &prim0 = prims[start() + i];
BoundBox bounds0 = get_prim_bounds(prim0); BoundBox bounds0 = get_prim_bounds(prim0);
int4 bin0 = get_bin(bounds0); int4 bin0 = get_bin(bounds0);
/* increase bounds of bins */ /* increase bounds of bins */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0); int b00 = (int)extract<0>(bin0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0); bin_count[b00][0]++;
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0); bin_bounds[b00][0].grow(bounds0);
} int b01 = (int)extract<1>(bin0);
} bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
}
}
/* sweep from right to left and compute parallel prefix of merged bounds */ /* sweep from right to left and compute parallel prefix of merged bounds */
float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */ float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
float4 r_count[MAX_BINS]; /* number of primitives on the right */ float4 r_count[MAX_BINS]; /* number of primitives on the right */
int4 count = make_int4(0); int4 count = make_int4(0);
BoundBox bx = BoundBox::empty; BoundBox bx = BoundBox::empty;
BoundBox by = BoundBox::empty; BoundBox by = BoundBox::empty;
BoundBox bz = BoundBox::empty; BoundBox bz = BoundBox::empty;
for(size_t i = num_bins - 1; i > 0; i--) { for (size_t i = num_bins - 1; i > 0; i--) {
count = count + bin_count[i]; count = count + bin_count[i];
r_count[i] = blocks(count); r_count[i] = blocks(count);
bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area(); bx = merge(bx, bin_bounds[i][0]);
by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area(); r_area[i][0] = bx.half_area();
bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area(); by = merge(by, bin_bounds[i][1]);
r_area[i][3] = r_area[i][2]; r_area[i][1] = by.half_area();
} bz = merge(bz, bin_bounds[i][2]);
r_area[i][2] = bz.half_area();
r_area[i][3] = r_area[i][2];
}
/* sweep from left to right and compute SAH */ /* sweep from left to right and compute SAH */
int4 ii = make_int4(1); int4 ii = make_int4(1);
float4 bestSAH = make_float4(FLT_MAX); float4 bestSAH = make_float4(FLT_MAX);
int4 bestSplit = make_int4(-1); int4 bestSplit = make_int4(-1);
count = make_int4(0); count = make_int4(0);
bx = BoundBox::empty; bx = BoundBox::empty;
by = BoundBox::empty; by = BoundBox::empty;
bz = BoundBox::empty; bz = BoundBox::empty;
for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) { for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
count = count + bin_count[i-1]; count = count + bin_count[i - 1];
bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area(); bx = merge(bx, bin_bounds[i - 1][0]);
by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area(); float Ax = bx.half_area();
bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area(); by = merge(by, bin_bounds[i - 1][1]);
float Ay = by.half_area();
bz = merge(bz, bin_bounds[i - 1][2]);
float Az = bz.half_area();
float4 lCount = blocks(count); float4 lCount = blocks(count);
float4 lArea = make_float4(Ax,Ay,Az,Az); float4 lArea = make_float4(Ax, Ay, Az, Az);
float4 sah = lArea*lCount + r_area[i]*r_count[i]; float4 sah = lArea * lCount + r_area[i] * r_count[i];
bestSplit = select(sah < bestSAH,ii,bestSplit); bestSplit = select(sah < bestSAH, ii, bestSplit);
bestSAH = min(sah,bestSAH); bestSAH = min(sah, bestSAH);
} }
int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f); int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX); bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
/* find best dimension */ /* find best dimension */
dim = get_best_dimension(bestSAH); dim = get_best_dimension(bestSAH);
splitSAH = bestSAH[dim]; splitSAH = bestSAH[dim];
pos = bestSplit[dim]; pos = bestSplit[dim];
leafSAH = bounds_.half_area() * blocks(size()); leafSAH = bounds_.half_area() * blocks(size());
} }
void BVHObjectBinning::split(BVHReference* prims, void BVHObjectBinning::split(BVHReference *prims,
BVHObjectBinning& left_o, BVHObjectBinning &left_o,
BVHObjectBinning& right_o) const BVHObjectBinning &right_o) const
{ {
size_t N = size(); size_t N = size();
BoundBox lgeom_bounds = BoundBox::empty; BoundBox lgeom_bounds = BoundBox::empty;
BoundBox rgeom_bounds = BoundBox::empty; BoundBox rgeom_bounds = BoundBox::empty;
BoundBox lcent_bounds = BoundBox::empty; BoundBox lcent_bounds = BoundBox::empty;
BoundBox rcent_bounds = BoundBox::empty; BoundBox rcent_bounds = BoundBox::empty;
ssize_t l = 0, r = N-1; ssize_t l = 0, r = N - 1;
while(l <= r) { while (l <= r) {
prefetch_L2(&prims[start() + l + 8]); prefetch_L2(&prims[start() + l + 8]);
prefetch_L2(&prims[start() + r - 8]); prefetch_L2(&prims[start() + r - 8]);
BVHReference prim = prims[start() + l]; BVHReference prim = prims[start() + l];
BoundBox unaligned_bounds = get_prim_bounds(prim); BoundBox unaligned_bounds = get_prim_bounds(prim);
float3 unaligned_center = unaligned_bounds.center2(); float3 unaligned_center = unaligned_bounds.center2();
float3 center = prim.bounds().center2(); float3 center = prim.bounds().center2();
if(get_bin(unaligned_center)[dim] < pos) { if (get_bin(unaligned_center)[dim] < pos) {
lgeom_bounds.grow(prim.bounds()); lgeom_bounds.grow(prim.bounds());
lcent_bounds.grow(center); lcent_bounds.grow(center);
l++; l++;
} }
else { else {
rgeom_bounds.grow(prim.bounds()); rgeom_bounds.grow(prim.bounds());
rcent_bounds.grow(center); rcent_bounds.grow(center);
swap(prims[start()+l],prims[start()+r]); swap(prims[start() + l], prims[start() + r]);
r--; r--;
} }
} }
/* finish */ /* finish */
if(l != 0 && N-1-r != 0) { if (l != 0 && N - 1 - r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims); right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims); prims);
return; left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
} return;
}
/* object medium split if we did not make progress, can happen when all /* object medium split if we did not make progress, can happen when all
* primitives have same centroid */ * primitives have same centroid */
lgeom_bounds = BoundBox::empty; lgeom_bounds = BoundBox::empty;
rgeom_bounds = BoundBox::empty; rgeom_bounds = BoundBox::empty;
lcent_bounds = BoundBox::empty; lcent_bounds = BoundBox::empty;
rcent_bounds = BoundBox::empty; rcent_bounds = BoundBox::empty;
for(size_t i = 0; i < N/2; i++) { for (size_t i = 0; i < N / 2; i++) {
lgeom_bounds.grow(prims[start()+i].bounds()); lgeom_bounds.grow(prims[start() + i].bounds());
lcent_bounds.grow(prims[start()+i].bounds().center2()); lcent_bounds.grow(prims[start() + i].bounds().center2());
} }
for(size_t i = N/2; i < N; i++) { for (size_t i = N / 2; i < N; i++) {
rgeom_bounds.grow(prims[start()+i].bounds()); rgeom_bounds.grow(prims[start() + i].bounds());
rcent_bounds.grow(prims[start()+i].bounds().center2()); rcent_bounds.grow(prims[start() + i].bounds().center2());
} }
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims); right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims); prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -34,81 +34,82 @@ class BVHBuild;
* location to different sets. The SAH is evaluated by computing the number of * location to different sets. The SAH is evaluated by computing the number of
* blocks occupied by the primitives in the partitions. */ * blocks occupied by the primitives in the partitions. */
class BVHObjectBinning : public BVHRange class BVHObjectBinning : public BVHRange {
{ public:
public: __forceinline BVHObjectBinning() : leafSAH(FLT_MAX)
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX) {} {
}
BVHObjectBinning(const BVHRange& job, BVHObjectBinning(const BVHRange &job,
BVHReference *prims, BVHReference *prims,
const BVHUnaligned *unaligned_heuristic = NULL, const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL); const Transform *aligned_space = NULL);
void split(BVHReference *prims, void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const;
BVHObjectBinning& left_o,
BVHObjectBinning& right_o) const;
__forceinline const BoundBox& unaligned_bounds() { return bounds_; } __forceinline const BoundBox &unaligned_bounds()
{
return bounds_;
}
float splitSAH; /* SAH cost of the best split */ float splitSAH; /* SAH cost of the best split */
float leafSAH; /* SAH cost of creating a leaf */ float leafSAH; /* SAH cost of creating a leaf */
protected: protected:
int dim; /* best split dimension */ int dim; /* best split dimension */
int pos; /* best split position */ int pos; /* best split position */
size_t num_bins; /* actual number of bins to use */ size_t num_bins; /* actual number of bins to use */
float3 scale; /* scaling factor to compute bin */ float3 scale; /* scaling factor to compute bin */
/* Effective bounds and centroid bounds. */ /* Effective bounds and centroid bounds. */
BoundBox bounds_; BoundBox bounds_;
BoundBox cent_bounds_; BoundBox cent_bounds_;
const BVHUnaligned *unaligned_heuristic_; const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_; const Transform *aligned_space_;
enum { MAX_BINS = 32 }; enum { MAX_BINS = 32 };
enum { LOG_BLOCK_SIZE = 2 }; enum { LOG_BLOCK_SIZE = 2 };
/* computes the bin numbers for each dimension for a box. */ /* computes the bin numbers for each dimension for a box. */
__forceinline int4 get_bin(const BoundBox& box) const __forceinline int4 get_bin(const BoundBox &box) const
{ {
int4 a = make_int4((box.center2() - cent_bounds_.min)*scale - make_float3(0.5f)); int4 a = make_int4((box.center2() - cent_bounds_.min) * scale - make_float3(0.5f));
int4 mn = make_int4(0); int4 mn = make_int4(0);
int4 mx = make_int4((int)num_bins-1); int4 mx = make_int4((int)num_bins - 1);
return clamp(a, mn, mx); return clamp(a, mn, mx);
} }
/* computes the bin numbers for each dimension for a point. */ /* computes the bin numbers for each dimension for a point. */
__forceinline int4 get_bin(const float3& c) const __forceinline int4 get_bin(const float3 &c) const
{ {
return make_int4((c - cent_bounds_.min)*scale - make_float3(0.5f)); return make_int4((c - cent_bounds_.min) * scale - make_float3(0.5f));
} }
/* compute the number of blocks occupied for each dimension. */ /* compute the number of blocks occupied for each dimension. */
__forceinline float4 blocks(const int4& a) const __forceinline float4 blocks(const int4 &a) const
{ {
return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE); return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
} }
/* compute the number of blocks occupied in one dimension. */ /* compute the number of blocks occupied in one dimension. */
__forceinline int blocks(size_t a) const __forceinline int blocks(size_t a) const
{ {
return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE); return (int)((a + ((1LL << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
} }
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{ {
if(aligned_space_ == NULL) { if (aligned_space_ == NULL) {
return prim.bounds(); return prim.bounds();
} }
else { else {
return unaligned_heuristic_->compute_aligned_prim_boundbox( return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
prim, *aligned_space_); }
} }
}
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_BINNING_H__ */ #endif /* __BVH_BINNING_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -41,106 +41,101 @@ class Progress;
/* BVH Builder */ /* BVH Builder */
class BVHBuild class BVHBuild {
{ public:
public: /* Constructor/Destructor */
/* Constructor/Destructor */ BVHBuild(const vector<Object *> &objects,
BVHBuild(const vector<Object*>& objects, array<int> &prim_type,
array<int>& prim_type, array<int> &prim_index,
array<int>& prim_index, array<int> &prim_object,
array<int>& prim_object, array<float2> &prim_time,
array<float2>& prim_time, const BVHParams &params,
const BVHParams& params, Progress &progress);
Progress& progress); ~BVHBuild();
~BVHBuild();
BVHNode *run(); BVHNode *run();
protected: protected:
friend class BVHMixedSplit; friend class BVHMixedSplit;
friend class BVHObjectSplit; friend class BVHObjectSplit;
friend class BVHSpatialSplit; friend class BVHSpatialSplit;
friend class BVHBuildTask; friend class BVHBuildTask;
friend class BVHSpatialSplitBuildTask; friend class BVHSpatialSplitBuildTask;
friend class BVHObjectBinning; friend class BVHObjectBinning;
/* Adding references. */ /* Adding references. */
void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i); void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i); void add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i); void add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i); void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
void add_references(BVHRange& root); void add_references(BVHRange &root);
/* Building. */ /* Building. */
BVHNode *build_node(const BVHRange& range, BVHNode *build_node(const BVHRange &range,
vector<BVHReference> *references, vector<BVHReference> *references,
int level, int level,
int thread_id); int thread_id);
BVHNode *build_node(const BVHObjectBinning& range, int level); BVHNode *build_node(const BVHObjectBinning &range, int level);
BVHNode *create_leaf_node(const BVHRange& range, BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
const vector<BVHReference>& references); BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
bool range_within_max_leaf_size(const BVHRange& range, bool range_within_max_leaf_size(const BVHRange &range,
const vector<BVHReference>& references) const; const vector<BVHReference> &references) const;
/* Threads. */ /* Threads. */
enum { THREAD_TASK_SIZE = 4096 }; enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node, void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
int child, void thread_build_spatial_split_node(InnerNode *node,
BVHObjectBinning *range, int child,
int level); BVHRange *range,
void thread_build_spatial_split_node(InnerNode *node, vector<BVHReference> *references,
int child, int level,
BVHRange *range, int thread_id);
vector<BVHReference> *references, thread_mutex build_mutex;
int level,
int thread_id);
thread_mutex build_mutex;
/* Progress. */ /* Progress. */
void progress_update(); void progress_update();
/* Tree rotations. */ /* Tree rotations. */
void rotate(BVHNode *node, int max_depth); void rotate(BVHNode *node, int max_depth);
void rotate(BVHNode *node, int max_depth, int iterations); void rotate(BVHNode *node, int max_depth, int iterations);
/* Objects and primitive references. */ /* Objects and primitive references. */
vector<Object*> objects; vector<Object *> objects;
vector<BVHReference> references; vector<BVHReference> references;
int num_original_references; int num_original_references;
/* Output primitive indexes and objects. */ /* Output primitive indexes and objects. */
array<int>& prim_type; array<int> &prim_type;
array<int>& prim_index; array<int> &prim_index;
array<int>& prim_object; array<int> &prim_object;
array<float2>& prim_time; array<float2> &prim_time;
bool need_prim_time; bool need_prim_time;
/* Build parameters. */ /* Build parameters. */
BVHParams params; BVHParams params;
/* Progress reporting. */ /* Progress reporting. */
Progress& progress; Progress &progress;
double progress_start_time; double progress_start_time;
size_t progress_count; size_t progress_count;
size_t progress_total; size_t progress_total;
size_t progress_original_total; size_t progress_original_total;
/* Spatial splitting. */ /* Spatial splitting. */
float spatial_min_overlap; float spatial_min_overlap;
vector<BVHSpatialStorage> spatial_storage; vector<BVHSpatialStorage> spatial_storage;
size_t spatial_free_index; size_t spatial_free_index;
thread_spin_lock spatial_spin_lock; thread_spin_lock spatial_spin_lock;
/* Threads. */ /* Threads. */
TaskPool task_pool; TaskPool task_pool;
/* Unaligned building. */ /* Unaligned building. */
BVHUnaligned unaligned_heuristic; BVHUnaligned unaligned_heuristic;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_BUILD_H__ */ #endif /* __BVH_BUILD_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -19,65 +19,68 @@
#ifdef WITH_EMBREE #ifdef WITH_EMBREE
#include <embree3/rtcore.h> # include <embree3/rtcore.h>
#include <embree3/rtcore_scene.h> # include <embree3/rtcore_scene.h>
#include "bvh/bvh.h" # include "bvh/bvh.h"
#include "bvh/bvh_params.h" # include "bvh/bvh_params.h"
#include "util/util_thread.h" # include "util/util_thread.h"
#include "util/util_types.h" # include "util/util_types.h"
#include "util/util_vector.h" # include "util/util_vector.h"
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
class Mesh; class Mesh;
class BVHEmbree : public BVH class BVHEmbree : public BVH {
{ public:
public: virtual void build(Progress &progress, Stats *stats) override;
virtual void build(Progress& progress, Stats *stats) override; virtual ~BVHEmbree();
virtual ~BVHEmbree(); RTCScene scene;
RTCScene scene; static void destroy(RTCScene);
static void destroy(RTCScene);
/* Building process. */ /* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override; virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
protected: protected:
friend class BVH; friend class BVH;
BVHEmbree(const BVHParams& params, const vector<Object*>& objects); BVHEmbree(const BVHParams &params, const vector<Object *> &objects);
virtual void pack_nodes(const BVHNode*) override; virtual void pack_nodes(const BVHNode *) override;
virtual void refit_nodes() override; virtual void refit_nodes() override;
void add_object(Object *ob, int i); void add_object(Object *ob, int i);
void add_instance(Object *ob, int i); void add_instance(Object *ob, int i);
void add_curves(Object *ob, int i); void add_curves(Object *ob, int i);
void add_triangles(Object *ob, int i); void add_triangles(Object *ob, int i);
ssize_t mem_used; ssize_t mem_used;
void add_delayed_delete_scene(RTCScene scene) { delayed_delete_scenes.push_back(scene); } void add_delayed_delete_scene(RTCScene scene)
BVHEmbree *top_level; {
private: delayed_delete_scenes.push_back(scene);
void delete_rtcScene(); }
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh); BVHEmbree *top_level;
void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
static RTCDevice rtc_shared_device; private:
static int rtc_shared_users; void delete_rtcScene();
static thread_mutex rtc_shared_mutex; void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
Stats *stats; static RTCDevice rtc_shared_device;
vector<RTCScene> delayed_delete_scenes; static int rtc_shared_users;
int curve_subdivisions; static thread_mutex rtc_shared_mutex;
enum RTCBuildQuality build_quality;
bool use_curves, use_ribbons, dynamic_scene; Stats *stats;
vector<RTCScene> delayed_delete_scenes;
int curve_subdivisions;
enum RTCBuildQuality build_quality;
bool use_curves, use_ribbons, dynamic_scene;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* WITH_EMBREE */ #endif /* WITH_EMBREE */
#endif /* __BVH_EMBREE_H__ */ #endif /* __BVH_EMBREE_H__ */

View File

@@ -28,199 +28,197 @@ CCL_NAMESPACE_BEGIN
int BVHNode::getSubtreeSize(BVH_STAT stat) const int BVHNode::getSubtreeSize(BVH_STAT stat) const
{ {
int cnt = 0; int cnt = 0;
switch(stat) switch (stat) {
{ case BVH_STAT_NODE_COUNT:
case BVH_STAT_NODE_COUNT: cnt = 1;
cnt = 1; break;
break; case BVH_STAT_LEAF_COUNT:
case BVH_STAT_LEAF_COUNT: cnt = is_leaf() ? 1 : 0;
cnt = is_leaf() ? 1 : 0; break;
break; case BVH_STAT_INNER_COUNT:
case BVH_STAT_INNER_COUNT: cnt = is_leaf() ? 0 : 1;
cnt = is_leaf() ? 0 : 1; break;
break; case BVH_STAT_TRIANGLE_COUNT:
case BVH_STAT_TRIANGLE_COUNT: cnt = is_leaf() ? reinterpret_cast<const LeafNode *>(this)->num_triangles() : 0;
cnt = is_leaf() ? reinterpret_cast<const LeafNode*>(this)->num_triangles() : 0; break;
break; case BVH_STAT_CHILDNODE_COUNT:
case BVH_STAT_CHILDNODE_COUNT: cnt = num_children();
cnt = num_children(); break;
break; case BVH_STAT_ALIGNED_COUNT:
case BVH_STAT_ALIGNED_COUNT: if (!is_unaligned) {
if(!is_unaligned) { cnt = 1;
cnt = 1; }
} break;
break; case BVH_STAT_UNALIGNED_COUNT:
case BVH_STAT_UNALIGNED_COUNT: if (is_unaligned) {
if(is_unaligned) { cnt = 1;
cnt = 1; }
} break;
break; case BVH_STAT_ALIGNED_INNER_COUNT:
case BVH_STAT_ALIGNED_INNER_COUNT: if (!is_leaf()) {
if(!is_leaf()) { bool has_unaligned = false;
bool has_unaligned = false; for (int j = 0; j < num_children(); j++) {
for(int j = 0; j < num_children(); j++) { has_unaligned |= get_child(j)->is_unaligned;
has_unaligned |= get_child(j)->is_unaligned; }
} cnt += has_unaligned ? 0 : 1;
cnt += has_unaligned? 0: 1; }
} break;
break; case BVH_STAT_UNALIGNED_INNER_COUNT:
case BVH_STAT_UNALIGNED_INNER_COUNT: if (!is_leaf()) {
if(!is_leaf()) { bool has_unaligned = false;
bool has_unaligned = false; for (int j = 0; j < num_children(); j++) {
for(int j = 0; j < num_children(); j++) { has_unaligned |= get_child(j)->is_unaligned;
has_unaligned |= get_child(j)->is_unaligned; }
} cnt += has_unaligned ? 1 : 0;
cnt += has_unaligned? 1: 0; }
} break;
break; case BVH_STAT_ALIGNED_LEAF_COUNT:
case BVH_STAT_ALIGNED_LEAF_COUNT: cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
cnt = (is_leaf() && !is_unaligned) ? 1 : 0; break;
break; case BVH_STAT_UNALIGNED_LEAF_COUNT:
case BVH_STAT_UNALIGNED_LEAF_COUNT: cnt = (is_leaf() && is_unaligned) ? 1 : 0;
cnt = (is_leaf() && is_unaligned) ? 1 : 0; break;
break; case BVH_STAT_DEPTH:
case BVH_STAT_DEPTH: if (is_leaf()) {
if(is_leaf()) { cnt = 1;
cnt = 1; }
} else {
else { for (int i = 0; i < num_children(); i++) {
for(int i = 0; i < num_children(); i++) { cnt = max(cnt, get_child(i)->getSubtreeSize(stat));
cnt = max(cnt, get_child(i)->getSubtreeSize(stat)); }
} cnt += 1;
cnt += 1; }
} return cnt;
return cnt; default:
default: assert(0); /* unknown mode */
assert(0); /* unknown mode */ }
}
if(!is_leaf()) if (!is_leaf())
for(int i = 0; i < num_children(); i++) for (int i = 0; i < num_children(); i++)
cnt += get_child(i)->getSubtreeSize(stat); cnt += get_child(i)->getSubtreeSize(stat);
return cnt; return cnt;
} }
void BVHNode::deleteSubtree() void BVHNode::deleteSubtree()
{ {
for(int i = 0; i < num_children(); i++) for (int i = 0; i < num_children(); i++)
if(get_child(i)) if (get_child(i))
get_child(i)->deleteSubtree(); get_child(i)->deleteSubtree();
delete this; delete this;
} }
float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) const float BVHNode::computeSubtreeSAHCost(const BVHParams &p, float probability) const
{ {
float SAH = probability * p.cost(num_children(), num_triangles()); float SAH = probability * p.cost(num_children(), num_triangles());
for(int i = 0; i < num_children(); i++) { for (int i = 0; i < num_children(); i++) {
BVHNode *child = get_child(i); BVHNode *child = get_child(i);
SAH += child->computeSubtreeSAHCost(p, probability * child->bounds.safe_area()/bounds.safe_area()); SAH += child->computeSubtreeSAHCost(
} p, probability * child->bounds.safe_area() / bounds.safe_area());
}
return SAH; return SAH;
} }
uint BVHNode::update_visibility() uint BVHNode::update_visibility()
{ {
if(!is_leaf() && visibility == 0) { if (!is_leaf() && visibility == 0) {
InnerNode *inner = (InnerNode*)this; InnerNode *inner = (InnerNode *)this;
BVHNode *child0 = inner->children[0]; BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1]; BVHNode *child1 = inner->children[1];
visibility = child0->update_visibility()|child1->update_visibility(); visibility = child0->update_visibility() | child1->update_visibility();
} }
return visibility; return visibility;
} }
void BVHNode::update_time() void BVHNode::update_time()
{ {
if(!is_leaf()) { if (!is_leaf()) {
InnerNode *inner = (InnerNode*)this; InnerNode *inner = (InnerNode *)this;
BVHNode *child0 = inner->children[0]; BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1]; BVHNode *child1 = inner->children[1];
child0->update_time(); child0->update_time();
child1->update_time(); child1->update_time();
time_from = min(child0->time_from, child1->time_from); time_from = min(child0->time_from, child1->time_from);
time_to = max(child0->time_to, child1->time_to); time_to = max(child0->time_to, child1->time_to);
} }
} }
namespace { namespace {
struct DumpTraversalContext { struct DumpTraversalContext {
/* Descriptor of wile where writing is happening. */ /* Descriptor of wile where writing is happening. */
FILE *stream; FILE *stream;
/* Unique identifier of the node current. */ /* Unique identifier of the node current. */
int id; int id;
}; };
void dump_subtree(DumpTraversalContext *context, void dump_subtree(DumpTraversalContext *context, const BVHNode *node, const BVHNode *parent = NULL)
const BVHNode *node,
const BVHNode *parent = NULL)
{ {
if(node->is_leaf()) { if (node->is_leaf()) {
fprintf(context->stream, fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n", " node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n",
node, node,
context->id); context->id);
} }
else { else {
fprintf(context->stream, fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#cceecc\",style=filled]\n", " node_%p [label=\"%d\",fillcolor=\"#cceecc\",style=filled]\n",
node, node,
context->id); context->id);
} }
if(parent != NULL) { if (parent != NULL) {
fprintf(context->stream, " node_%p -> node_%p;\n", parent, node); fprintf(context->stream, " node_%p -> node_%p;\n", parent, node);
} }
context->id += 1; context->id += 1;
for(int i = 0; i < node->num_children(); ++i) { for (int i = 0; i < node->num_children(); ++i) {
dump_subtree(context, node->get_child(i), node); dump_subtree(context, node->get_child(i), node);
} }
} }
} // namespace } // namespace
void BVHNode::dump_graph(const char *filename) void BVHNode::dump_graph(const char *filename)
{ {
DumpTraversalContext context; DumpTraversalContext context;
context.stream = fopen(filename, "w"); context.stream = fopen(filename, "w");
if(context.stream == NULL) { if (context.stream == NULL) {
return; return;
} }
context.id = 0; context.id = 0;
fprintf(context.stream, "digraph BVH {\n"); fprintf(context.stream, "digraph BVH {\n");
dump_subtree(&context, this); dump_subtree(&context, this);
fprintf(context.stream, "}\n"); fprintf(context.stream, "}\n");
fclose(context.stream); fclose(context.stream);
} }
/* Inner Node */ /* Inner Node */
void InnerNode::print(int depth) const void InnerNode::print(int depth) const
{ {
for(int i = 0; i < depth; i++) for (int i = 0; i < depth; i++)
printf(" "); printf(" ");
printf("inner node %p\n", (void*)this); printf("inner node %p\n", (void *)this);
if(children[0]) if (children[0])
children[0]->print(depth+1); children[0]->print(depth + 1);
if(children[1]) if (children[1])
children[1]->print(depth+1); children[1]->print(depth + 1);
} }
void LeafNode::print(int depth) const void LeafNode::print(int depth) const
{ {
for(int i = 0; i < depth; i++) for (int i = 0; i < depth; i++)
printf(" "); printf(" ");
printf("leaf node %d to %d\n", lo, hi); printf("leaf node %d to %d\n", lo, hi);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -24,227 +24,232 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
enum BVH_STAT { enum BVH_STAT {
BVH_STAT_NODE_COUNT, BVH_STAT_NODE_COUNT,
BVH_STAT_INNER_COUNT, BVH_STAT_INNER_COUNT,
BVH_STAT_LEAF_COUNT, BVH_STAT_LEAF_COUNT,
BVH_STAT_TRIANGLE_COUNT, BVH_STAT_TRIANGLE_COUNT,
BVH_STAT_CHILDNODE_COUNT, BVH_STAT_CHILDNODE_COUNT,
BVH_STAT_ALIGNED_COUNT, BVH_STAT_ALIGNED_COUNT,
BVH_STAT_UNALIGNED_COUNT, BVH_STAT_UNALIGNED_COUNT,
BVH_STAT_ALIGNED_INNER_COUNT, BVH_STAT_ALIGNED_INNER_COUNT,
BVH_STAT_UNALIGNED_INNER_COUNT, BVH_STAT_UNALIGNED_INNER_COUNT,
BVH_STAT_ALIGNED_LEAF_COUNT, BVH_STAT_ALIGNED_LEAF_COUNT,
BVH_STAT_UNALIGNED_LEAF_COUNT, BVH_STAT_UNALIGNED_LEAF_COUNT,
BVH_STAT_DEPTH, BVH_STAT_DEPTH,
}; };
class BVHParams; class BVHParams;
class BVHNode class BVHNode {
{ public:
public: virtual ~BVHNode()
virtual ~BVHNode() {
{ delete aligned_space;
delete aligned_space; }
}
virtual bool is_leaf() const = 0; virtual bool is_leaf() const = 0;
virtual int num_children() const = 0; virtual int num_children() const = 0;
virtual BVHNode *get_child(int i) const = 0; virtual BVHNode *get_child(int i) const = 0;
virtual int num_triangles() const { return 0; } virtual int num_triangles() const
virtual void print(int depth = 0) const = 0; {
return 0;
}
virtual void print(int depth = 0) const = 0;
inline void set_aligned_space(const Transform& aligned_space) inline void set_aligned_space(const Transform &aligned_space)
{ {
is_unaligned = true; is_unaligned = true;
if(this->aligned_space == NULL) { if (this->aligned_space == NULL) {
this->aligned_space = new Transform(aligned_space); this->aligned_space = new Transform(aligned_space);
} }
else { else {
*this->aligned_space = aligned_space; *this->aligned_space = aligned_space;
} }
} }
inline Transform get_aligned_space() const inline Transform get_aligned_space() const
{ {
if(aligned_space == NULL) { if (aligned_space == NULL) {
return transform_identity(); return transform_identity();
} }
return *aligned_space; return *aligned_space;
} }
inline bool has_unaligned() const inline bool has_unaligned() const
{ {
if(is_leaf()) { if (is_leaf()) {
return false; return false;
} }
for(int i = 0; i < num_children(); ++i) { for (int i = 0; i < num_children(); ++i) {
if(get_child(i)->is_unaligned) { if (get_child(i)->is_unaligned) {
return true; return true;
} }
} }
return false; return false;
} }
// Subtree functions // Subtree functions
int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const; int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const; float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f) const;
void deleteSubtree(); void deleteSubtree();
uint update_visibility(); uint update_visibility();
void update_time(); void update_time();
/* Dump the content of the tree as a graphviz file. */ /* Dump the content of the tree as a graphviz file. */
void dump_graph(const char *filename); void dump_graph(const char *filename);
// Properties. // Properties.
BoundBox bounds; BoundBox bounds;
uint visibility; uint visibility;
bool is_unaligned; bool is_unaligned;
/* TODO(sergey): Can be stored as 3x3 matrix, but better to have some /* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
* utilities and type defines in util_transform first. * utilities and type defines in util_transform first.
*/ */
Transform *aligned_space; Transform *aligned_space;
float time_from, time_to; float time_from, time_to;
protected: protected:
explicit BVHNode(const BoundBox& bounds) explicit BVHNode(const BoundBox &bounds)
: bounds(bounds), : bounds(bounds),
visibility(0), visibility(0),
is_unaligned(false), is_unaligned(false),
aligned_space(NULL), aligned_space(NULL),
time_from(0.0f), time_from(0.0f),
time_to(1.0f) time_to(1.0f)
{ {
} }
explicit BVHNode(const BVHNode& other) explicit BVHNode(const BVHNode &other)
: bounds(other.bounds), : bounds(other.bounds),
visibility(other.visibility), visibility(other.visibility),
is_unaligned(other.is_unaligned), is_unaligned(other.is_unaligned),
aligned_space(NULL), aligned_space(NULL),
time_from(other.time_from), time_from(other.time_from),
time_to(other.time_to) time_to(other.time_to)
{ {
if(other.aligned_space != NULL) { if (other.aligned_space != NULL) {
assert(other.is_unaligned); assert(other.is_unaligned);
aligned_space = new Transform(); aligned_space = new Transform();
*aligned_space = *other.aligned_space; *aligned_space = *other.aligned_space;
} }
else { else {
assert(!other.is_unaligned); assert(!other.is_unaligned);
} }
} }
}; };
class InnerNode : public BVHNode class InnerNode : public BVHNode {
{ public:
public: static constexpr int kNumMaxChildren = 8;
static constexpr int kNumMaxChildren = 8;
InnerNode(const BoundBox& bounds, InnerNode(const BoundBox &bounds, BVHNode *child0, BVHNode *child1)
BVHNode* child0, : BVHNode(bounds), num_children_(2)
BVHNode* child1) {
: BVHNode(bounds), children[0] = child0;
num_children_(2) children[1] = child1;
{ reset_unused_children();
children[0] = child0;
children[1] = child1;
reset_unused_children();
if(child0 && child1) { if (child0 && child1) {
visibility = child0->visibility | child1->visibility; visibility = child0->visibility | child1->visibility;
} }
else { else {
/* Happens on build cancel. */ /* Happens on build cancel. */
visibility = 0; visibility = 0;
} }
} }
InnerNode(const BoundBox& bounds, InnerNode(const BoundBox &bounds, BVHNode **children, const int num_children)
BVHNode** children, : BVHNode(bounds), num_children_(num_children)
const int num_children) {
: BVHNode(bounds), visibility = 0;
num_children_(num_children) time_from = FLT_MAX;
{ time_to = -FLT_MAX;
visibility = 0; for (int i = 0; i < num_children; ++i) {
time_from = FLT_MAX; assert(children[i] != NULL);
time_to = -FLT_MAX; visibility |= children[i]->visibility;
for(int i = 0; i < num_children; ++i) { this->children[i] = children[i];
assert(children[i] != NULL); time_from = min(time_from, children[i]->time_from);
visibility |= children[i]->visibility; time_to = max(time_to, children[i]->time_to);
this->children[i] = children[i]; }
time_from = min(time_from, children[i]->time_from); reset_unused_children();
time_to = max(time_to, children[i]->time_to); }
}
reset_unused_children();
}
/* NOTE: This function is only used during binary BVH builder, and it /* NOTE: This function is only used during binary BVH builder, and it
* supposed to be configured to have 2 children which will be filled in in a * supposed to be configured to have 2 children which will be filled in in a
* bit. But this is important to have children reset to NULL. */ * bit. But this is important to have children reset to NULL. */
explicit InnerNode(const BoundBox& bounds) explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
: BVHNode(bounds), {
num_children_(0) reset_unused_children();
{ visibility = 0;
reset_unused_children(); num_children_ = 2;
visibility = 0; }
num_children_ = 2;
}
bool is_leaf() const { return false; } bool is_leaf() const
int num_children() const { return num_children_; } {
BVHNode *get_child(int i) const return false;
{ }
assert(i >= 0 && i < num_children_); int num_children() const
return children[i]; {
} return num_children_;
void print(int depth) const; }
BVHNode *get_child(int i) const
{
assert(i >= 0 && i < num_children_);
return children[i];
}
void print(int depth) const;
int num_children_; int num_children_;
BVHNode *children[kNumMaxChildren]; BVHNode *children[kNumMaxChildren];
protected: protected:
void reset_unused_children() void reset_unused_children()
{ {
for(int i = num_children_; i < kNumMaxChildren; ++i) { for (int i = num_children_; i < kNumMaxChildren; ++i) {
children[i] = NULL; children[i] = NULL;
} }
} }
}; };
class LeafNode : public BVHNode class LeafNode : public BVHNode {
{ public:
public: LeafNode(const BoundBox &bounds, uint visibility, int lo, int hi)
LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi) : BVHNode(bounds), lo(lo), hi(hi)
: BVHNode(bounds), {
lo(lo), this->bounds = bounds;
hi(hi) this->visibility = visibility;
{ }
this->bounds = bounds;
this->visibility = visibility;
}
LeafNode(const LeafNode& other) LeafNode(const LeafNode &other) : BVHNode(other), lo(other.lo), hi(other.hi)
: BVHNode(other), {
lo(other.lo), }
hi(other.hi)
{
}
bool is_leaf() const { return true; } bool is_leaf() const
int num_children() const { return 0; } {
BVHNode *get_child(int) const { return NULL; } return true;
int num_triangles() const { return hi - lo; } }
void print(int depth) const; int num_children() const
{
return 0;
}
BVHNode *get_child(int) const
{
return NULL;
}
int num_triangles() const
{
return hi - lo;
}
void print(int depth) const;
int lo; int lo;
int hi; int hi;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_NODE_H__ */ #endif /* __BVH_NODE_H__ */

View File

@@ -43,120 +43,121 @@ const char *bvh_layout_name(BVHLayout layout);
/* BVH Parameters */ /* BVH Parameters */
class BVHParams class BVHParams {
{ public:
public: /* spatial split area threshold */
bool use_spatial_split;
float spatial_split_alpha;
/* spatial split area threshold */ /* Unaligned nodes creation threshold */
bool use_spatial_split; float unaligned_split_threshold;
float spatial_split_alpha;
/* Unaligned nodes creation threshold */ /* SAH costs */
float unaligned_split_threshold; float sah_node_cost;
float sah_primitive_cost;
/* SAH costs */ /* number of primitives in leaf */
float sah_node_cost; int min_leaf_size;
float sah_primitive_cost; int max_triangle_leaf_size;
int max_motion_triangle_leaf_size;
int max_curve_leaf_size;
int max_motion_curve_leaf_size;
/* number of primitives in leaf */ /* object or mesh level bvh */
int min_leaf_size; bool top_level;
int max_triangle_leaf_size;
int max_motion_triangle_leaf_size;
int max_curve_leaf_size;
int max_motion_curve_leaf_size;
/* object or mesh level bvh */ /* BVH layout to be built. */
bool top_level; BVHLayout bvh_layout;
/* BVH layout to be built. */ /* Mask of primitives to be included into the BVH. */
BVHLayout bvh_layout; int primitive_mask;
/* Mask of primitives to be included into the BVH. */ /* Use unaligned bounding boxes.
int primitive_mask; * Only used for curves BVH.
*/
bool use_unaligned_nodes;
/* Use unaligned bounding boxes. /* Split time range to this number of steps and create leaf node for each
* Only used for curves BVH. * of this time steps.
*/ *
bool use_unaligned_nodes; * Speeds up rendering of motion curve primitives in the cost of higher
* memory usage.
*/
int num_motion_curve_steps;
/* Split time range to this number of steps and create leaf node for each /* Same as above, but for triangle primitives. */
* of this time steps. int num_motion_triangle_steps;
*
* Speeds up rendering of motion curve primitives in the cost of higher
* memory usage.
*/
int num_motion_curve_steps;
/* Same as above, but for triangle primitives. */ /* Same as in SceneParams. */
int num_motion_triangle_steps; int bvh_type;
/* Same as in SceneParams. */ /* These are needed for Embree. */
int bvh_type; int curve_flags;
int curve_subdivisions;
/* These are needed for Embree. */ /* fixed parameters */
int curve_flags; enum { MAX_DEPTH = 64, MAX_SPATIAL_DEPTH = 48, NUM_SPATIAL_BINS = 32 };
int curve_subdivisions;
/* fixed parameters */ BVHParams()
enum { {
MAX_DEPTH = 64, use_spatial_split = true;
MAX_SPATIAL_DEPTH = 48, spatial_split_alpha = 1e-5f;
NUM_SPATIAL_BINS = 32
};
BVHParams() unaligned_split_threshold = 0.7f;
{
use_spatial_split = true;
spatial_split_alpha = 1e-5f;
unaligned_split_threshold = 0.7f; /* todo: see if splitting up primitive cost to be separate for triangles
* and curves can help. so far in tests it doesn't help, but why? */
sah_node_cost = 1.0f;
sah_primitive_cost = 1.0f;
/* todo: see if splitting up primitive cost to be separate for triangles min_leaf_size = 1;
* and curves can help. so far in tests it doesn't help, but why? */ max_triangle_leaf_size = 8;
sah_node_cost = 1.0f; max_motion_triangle_leaf_size = 8;
sah_primitive_cost = 1.0f; max_curve_leaf_size = 1;
max_motion_curve_leaf_size = 4;
min_leaf_size = 1; top_level = false;
max_triangle_leaf_size = 8; bvh_layout = BVH_LAYOUT_BVH2;
max_motion_triangle_leaf_size = 8; use_unaligned_nodes = false;
max_curve_leaf_size = 1;
max_motion_curve_leaf_size = 4;
top_level = false; primitive_mask = PRIMITIVE_ALL;
bvh_layout = BVH_LAYOUT_BVH2;
use_unaligned_nodes = false;
primitive_mask = PRIMITIVE_ALL; num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
num_motion_curve_steps = 0; bvh_type = 0;
num_motion_triangle_steps = 0;
bvh_type = 0; curve_flags = 0;
curve_subdivisions = 4;
}
curve_flags = 0; /* SAH costs */
curve_subdivisions = 4; __forceinline float cost(int num_nodes, int num_primitives) const
} {
return node_cost(num_nodes) + primitive_cost(num_primitives);
}
/* SAH costs */ __forceinline float primitive_cost(int n) const
__forceinline float cost(int num_nodes, int num_primitives) const {
{ return node_cost(num_nodes) + primitive_cost(num_primitives); } return n * sah_primitive_cost;
}
__forceinline float primitive_cost(int n) const __forceinline float node_cost(int n) const
{ return n*sah_primitive_cost; } {
return n * sah_node_cost;
}
__forceinline float node_cost(int n) const __forceinline bool small_enough_for_leaf(int size, int level)
{ return n*sah_node_cost; } {
return (size <= min_leaf_size || level >= MAX_DEPTH);
}
__forceinline bool small_enough_for_leaf(int size, int level) /* Gets best matching BVH.
{ return (size <= min_leaf_size || level >= MAX_DEPTH); } *
* If the requested layout is supported by the device, it will be used.
/* Gets best matching BVH. * Otherwise, widest supported layout below that will be used.
* */
* If the requested layout is supported by the device, it will be used. static BVHLayout best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts);
* Otherwise, widest supported layout below that will be used.
*/
static BVHLayout best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts);
}; };
/* BVH Reference /* BVH Reference
@@ -164,49 +165,65 @@ public:
* Reference to a primitive. Primitive index and object are sneakily packed * Reference to a primitive. Primitive index and object are sneakily packed
* into BoundBox to reduce memory usage and align nicely */ * into BoundBox to reduce memory usage and align nicely */
class BVHReference class BVHReference {
{ public:
public: __forceinline BVHReference()
__forceinline BVHReference() {} {
}
__forceinline BVHReference(const BoundBox& bounds_, __forceinline BVHReference(const BoundBox &bounds_,
int prim_index_, int prim_index_,
int prim_object_, int prim_object_,
int prim_type, int prim_type,
float time_from = 0.0f, float time_from = 0.0f,
float time_to = 1.0f) float time_to = 1.0f)
: rbounds(bounds_), : rbounds(bounds_), time_from_(time_from), time_to_(time_to)
time_from_(time_from), {
time_to_(time_to) rbounds.min.w = __int_as_float(prim_index_);
{ rbounds.max.w = __int_as_float(prim_object_);
rbounds.min.w = __int_as_float(prim_index_); type = prim_type;
rbounds.max.w = __int_as_float(prim_object_); }
type = prim_type;
}
__forceinline const BoundBox& bounds() const { return rbounds; } __forceinline const BoundBox &bounds() const
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); } {
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); } return rbounds;
__forceinline int prim_type() const { return type; } }
__forceinline float time_from() const { return time_from_; } __forceinline int prim_index() const
__forceinline float time_to() const { return time_to_; } {
return __float_as_int(rbounds.min.w);
}
__forceinline int prim_object() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int prim_type() const
{
return type;
}
__forceinline float time_from() const
{
return time_from_;
}
__forceinline float time_to() const
{
return time_to_;
}
BVHReference &operator=(const BVHReference &arg)
{
if (&arg != this) {
/* TODO(sergey): Check if it is still faster to memcpy() with
* modern compilers.
*/
memcpy((void *)this, &arg, sizeof(BVHReference));
}
return *this;
}
BVHReference& operator=(const BVHReference &arg) { protected:
if(&arg != this) { BoundBox rbounds;
/* TODO(sergey): Check if it is still faster to memcpy() with uint type;
* modern compilers. float time_from_, time_to_;
*/
memcpy((void *)this, &arg, sizeof(BVHReference));
}
return *this;
}
protected:
BoundBox rbounds;
uint type;
float time_from_, time_to_;
}; };
/* BVH Range /* BVH Range
@@ -215,53 +232,68 @@ protected:
* the reference array of a subset of primitives Again uses trickery to pack * the reference array of a subset of primitives Again uses trickery to pack
* integers into BoundBox for alignment purposes. */ * integers into BoundBox for alignment purposes. */
class BVHRange class BVHRange {
{ public:
public: __forceinline BVHRange()
__forceinline BVHRange() {
{ rbounds.min.w = __int_as_float(0);
rbounds.min.w = __int_as_float(0); rbounds.max.w = __int_as_float(0);
rbounds.max.w = __int_as_float(0); }
}
__forceinline BVHRange(const BoundBox& bounds_, int start_, int size_) __forceinline BVHRange(const BoundBox &bounds_, int start_, int size_) : rbounds(bounds_)
: rbounds(bounds_) {
{ rbounds.min.w = __int_as_float(start_);
rbounds.min.w = __int_as_float(start_); rbounds.max.w = __int_as_float(size_);
rbounds.max.w = __int_as_float(size_); }
}
__forceinline BVHRange(const BoundBox& bounds_, const BoundBox& cbounds_, int start_, int size_) __forceinline BVHRange(const BoundBox &bounds_, const BoundBox &cbounds_, int start_, int size_)
: rbounds(bounds_), cbounds(cbounds_) : rbounds(bounds_), cbounds(cbounds_)
{ {
rbounds.min.w = __int_as_float(start_); rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_); rbounds.max.w = __int_as_float(size_);
} }
__forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); } __forceinline void set_start(int start_)
{
rbounds.min.w = __int_as_float(start_);
}
__forceinline const BoundBox& bounds() const { return rbounds; } __forceinline const BoundBox &bounds() const
__forceinline const BoundBox& cent_bounds() const { return cbounds; } {
__forceinline int start() const { return __float_as_int(rbounds.min.w); } return rbounds;
__forceinline int size() const { return __float_as_int(rbounds.max.w); } }
__forceinline int end() const { return start() + size(); } __forceinline const BoundBox &cent_bounds() const
{
return cbounds;
}
__forceinline int start() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int size() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int end() const
{
return start() + size();
}
protected: protected:
BoundBox rbounds; BoundBox rbounds;
BoundBox cbounds; BoundBox cbounds;
}; };
/* BVH Spatial Bin */ /* BVH Spatial Bin */
struct BVHSpatialBin struct BVHSpatialBin {
{ BoundBox bounds;
BoundBox bounds; int enter;
int enter; int exit;
int exit;
__forceinline BVHSpatialBin() __forceinline BVHSpatialBin()
{ {
} }
}; };
/* BVH Spatial Storage /* BVH Spatial Storage
@@ -272,18 +304,18 @@ struct BVHSpatialBin
*/ */
struct BVHSpatialStorage { struct BVHSpatialStorage {
/* Accumulated bounds when sweeping from right to left. */ /* Accumulated bounds when sweeping from right to left. */
vector<BoundBox> right_bounds; vector<BoundBox> right_bounds;
/* Bins used for histogram when selecting best split plane. */ /* Bins used for histogram when selecting best split plane. */
BVHSpatialBin bins[3][BVHParams::NUM_SPATIAL_BINS]; BVHSpatialBin bins[3][BVHParams::NUM_SPATIAL_BINS];
/* Temporary storage for the new references. Used by spatial split to store /* Temporary storage for the new references. Used by spatial split to store
* new references in before they're getting inserted into actual array, * new references in before they're getting inserted into actual array,
*/ */
vector<BVHReference> new_references; vector<BVHReference> new_references;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_PARAMS_H__ */ #endif /* __BVH_PARAMS_H__ */

View File

@@ -27,79 +27,77 @@ CCL_NAMESPACE_BEGIN
static const int BVH_SORT_THRESHOLD = 4096; static const int BVH_SORT_THRESHOLD = 4096;
struct BVHReferenceCompare { struct BVHReferenceCompare {
public: public:
int dim; int dim;
const BVHUnaligned *unaligned_heuristic; const BVHUnaligned *unaligned_heuristic;
const Transform *aligned_space; const Transform *aligned_space;
BVHReferenceCompare(int dim, BVHReferenceCompare(int dim,
const BVHUnaligned *unaligned_heuristic, const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space) const Transform *aligned_space)
: dim(dim), : dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
unaligned_heuristic(unaligned_heuristic), {
aligned_space(aligned_space) }
{
}
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{ {
return (aligned_space != NULL) return (aligned_space != NULL) ?
? unaligned_heuristic->compute_aligned_prim_boundbox( unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
prim, *aligned_space) prim.bounds();
: prim.bounds(); }
}
/* Compare two references. /* Compare two references.
* *
* Returns value is similar to return value of strcmp(). * Returns value is similar to return value of strcmp().
*/ */
__forceinline int compare(const BVHReference& ra, __forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
const BVHReference& rb) const {
{ BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
BoundBox ra_bounds = get_prim_bounds(ra), float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
rb_bounds = get_prim_bounds(rb); float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
if(ca < cb) return -1; if (ca < cb)
else if(ca > cb) return 1; return -1;
else if(ra.prim_object() < rb.prim_object()) return -1; else if (ca > cb)
else if(ra.prim_object() > rb.prim_object()) return 1; return 1;
else if(ra.prim_index() < rb.prim_index()) return -1; else if (ra.prim_object() < rb.prim_object())
else if(ra.prim_index() > rb.prim_index()) return 1; return -1;
else if(ra.prim_type() < rb.prim_type()) return -1; else if (ra.prim_object() > rb.prim_object())
else if(ra.prim_type() > rb.prim_type()) return 1; return 1;
else if (ra.prim_index() < rb.prim_index())
return -1;
else if (ra.prim_index() > rb.prim_index())
return 1;
else if (ra.prim_type() < rb.prim_type())
return -1;
else if (ra.prim_type() > rb.prim_type())
return 1;
return 0; return 0;
} }
bool operator()(const BVHReference& ra, const BVHReference& rb) bool operator()(const BVHReference &ra, const BVHReference &rb)
{ {
return (compare(ra, rb) < 0); return (compare(ra, rb) < 0);
} }
}; };
static void bvh_reference_sort_threaded(TaskPool *task_pool, static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data, BVHReference *data,
const int job_start, const int job_start,
const int job_end, const int job_end,
const BVHReferenceCompare& compare); const BVHReferenceCompare &compare);
class BVHSortTask : public Task { class BVHSortTask : public Task {
public: public:
BVHSortTask(TaskPool *task_pool, BVHSortTask(TaskPool *task_pool,
BVHReference *data, BVHReference *data,
const int job_start, const int job_start,
const int job_end, const int job_end,
const BVHReferenceCompare& compare) const BVHReferenceCompare &compare)
{ {
run = function_bind(bvh_reference_sort_threaded, run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
task_pool, }
data,
job_start,
job_end,
compare);
}
}; };
/* Multi-threaded reference sort. */ /* Multi-threaded reference sort. */
@@ -107,74 +105,71 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data, BVHReference *data,
const int job_start, const int job_start,
const int job_end, const int job_end,
const BVHReferenceCompare& compare) const BVHReferenceCompare &compare)
{ {
int start = job_start, end = job_end; int start = job_start, end = job_end;
bool have_work = (start < end); bool have_work = (start < end);
while(have_work) { while (have_work) {
const int count = job_end - job_start; const int count = job_end - job_start;
if(count < BVH_SORT_THRESHOLD) { if (count < BVH_SORT_THRESHOLD) {
/* Number of reference low enough, faster to finish the job /* Number of reference low enough, faster to finish the job
* in one thread rather than to spawn more threads. * in one thread rather than to spawn more threads.
*/ */
sort(data+job_start, data+job_end+1, compare); sort(data + job_start, data + job_end + 1, compare);
break; break;
} }
/* Single QSort step. /* Single QSort step.
* Use median-of-three method for the pivot point. * Use median-of-three method for the pivot point.
*/ */
int left = start, right = end; int left = start, right = end;
int center = (left + right) >> 1; int center = (left + right) >> 1;
if(compare.compare(data[left], data[center]) > 0) { if (compare.compare(data[left], data[center]) > 0) {
swap(data[left], data[center]); swap(data[left], data[center]);
} }
if(compare.compare(data[left], data[right]) > 0) { if (compare.compare(data[left], data[right]) > 0) {
swap(data[left], data[right]); swap(data[left], data[right]);
} }
if(compare.compare(data[center], data[right]) > 0) { if (compare.compare(data[center], data[right]) > 0) {
swap(data[center], data[right]); swap(data[center], data[right]);
} }
swap(data[center], data[right - 1]); swap(data[center], data[right - 1]);
BVHReference median = data[right - 1]; BVHReference median = data[right - 1];
do { do {
while(compare.compare(data[left], median) < 0) { while (compare.compare(data[left], median) < 0) {
++left; ++left;
} }
while(compare.compare(data[right], median) > 0) { while (compare.compare(data[right], median) > 0) {
--right; --right;
} }
if(left <= right) { if (left <= right) {
swap(data[left], data[right]); swap(data[left], data[right]);
++left; ++left;
--right; --right;
} }
} while(left <= right); } while (left <= right);
/* We only create one new task here to reduce downside effects of /* We only create one new task here to reduce downside effects of
* latency in TaskScheduler. * latency in TaskScheduler.
* So generally current thread keeps working on the left part of the * So generally current thread keeps working on the left part of the
* array, and we create new task for the right side. * array, and we create new task for the right side.
* However, if there's nothing to be done in the left side of the array * However, if there's nothing to be done in the left side of the array
* we don't create any tasks and make it so current thread works on the * we don't create any tasks and make it so current thread works on the
* right side. * right side.
*/ */
have_work = false; have_work = false;
if(left < end) { if (left < end) {
if(start < right) { if (start < right) {
task_pool->push(new BVHSortTask(task_pool, task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
data, }
left, end, else {
compare), true); start = left;
} have_work = true;
else { }
start = left; }
have_work = true; if (start < right) {
} end = right;
} have_work = true;
if(start < right) { }
end = right; }
have_work = true;
}
}
} }
void bvh_reference_sort(int start, void bvh_reference_sort(int start,
@@ -184,20 +179,20 @@ void bvh_reference_sort(int start,
const BVHUnaligned *unaligned_heuristic, const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space) const Transform *aligned_space)
{ {
const int count = end - start; const int count = end - start;
BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space); BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
if(count < BVH_SORT_THRESHOLD) { if (count < BVH_SORT_THRESHOLD) {
/* It is important to not use any mutex if array is small enough, /* It is important to not use any mutex if array is small enough,
* otherwise we end up in situation when we're going to sleep far * otherwise we end up in situation when we're going to sleep far
* too often. * too often.
*/ */
sort(data+start, data+end, compare); sort(data + start, data + end, compare);
} }
else { else {
TaskPool task_pool; TaskPool task_pool;
bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare); bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare);
task_pool.wait_work(); task_pool.wait_work();
} }
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -35,4 +35,4 @@ void bvh_reference_sort(int start,
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_SORT_H__ */ #endif /* __BVH_SORT_H__ */

View File

@@ -31,322 +31,314 @@ CCL_NAMESPACE_BEGIN
BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage, BVHSpatialStorage *storage,
const BVHRange& range, const BVHRange &range,
vector<BVHReference> *references, vector<BVHReference> *references,
float nodeSAH, float nodeSAH,
const BVHUnaligned *unaligned_heuristic, const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space) const Transform *aligned_space)
: sah(FLT_MAX), : sah(FLT_MAX),
dim(0), dim(0),
num_left(0), num_left(0),
left_bounds(BoundBox::empty), left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty), right_bounds(BoundBox::empty),
storage_(storage), storage_(storage),
references_(references), references_(references),
unaligned_heuristic_(unaligned_heuristic), unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space) aligned_space_(aligned_space)
{ {
const BVHReference *ref_ptr = &references_->at(range.start()); const BVHReference *ref_ptr = &references_->at(range.start());
float min_sah = FLT_MAX; float min_sah = FLT_MAX;
storage_->right_bounds.resize(range.size()); storage_->right_bounds.resize(range.size());
for(int dim = 0; dim < 3; dim++) { for (int dim = 0; dim < 3; dim++) {
/* Sort references. */ /* Sort references. */
bvh_reference_sort(range.start(), bvh_reference_sort(range.start(),
range.end(), range.end(),
&references_->at(0), &references_->at(0),
dim, dim,
unaligned_heuristic_, unaligned_heuristic_,
aligned_space_); aligned_space_);
/* sweep right to left and determine bounds. */ /* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty; BoundBox right_bounds = BoundBox::empty;
for(int i = range.size() - 1; i > 0; i--) { for (int i = range.size() - 1; i > 0; i--) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]); BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
right_bounds.grow(prim_bounds); right_bounds.grow(prim_bounds);
storage_->right_bounds[i - 1] = right_bounds; storage_->right_bounds[i - 1] = right_bounds;
} }
/* sweep left to right and select lowest SAH. */ /* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty; BoundBox left_bounds = BoundBox::empty;
for(int i = 1; i < range.size(); i++) { for (int i = 1; i < range.size(); i++) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]); BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
left_bounds.grow(prim_bounds); left_bounds.grow(prim_bounds);
right_bounds = storage_->right_bounds[i - 1]; right_bounds = storage_->right_bounds[i - 1];
float sah = nodeSAH + float sah = nodeSAH + left_bounds.safe_area() * builder->params.primitive_cost(i) +
left_bounds.safe_area() * builder->params.primitive_cost(i) + right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
if(sah < min_sah) { if (sah < min_sah) {
min_sah = sah; min_sah = sah;
this->sah = sah; this->sah = sah;
this->dim = dim; this->dim = dim;
this->num_left = i; this->num_left = i;
this->left_bounds = left_bounds; this->left_bounds = left_bounds;
this->right_bounds = right_bounds; this->right_bounds = right_bounds;
} }
} }
} }
} }
void BVHObjectSplit::split(BVHRange& left, void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &range)
BVHRange& right,
const BVHRange& range)
{ {
assert(references_->size() > 0); assert(references_->size() > 0);
/* sort references according to split */ /* sort references according to split */
bvh_reference_sort(range.start(), bvh_reference_sort(range.start(),
range.end(), range.end(),
&references_->at(0), &references_->at(0),
this->dim, this->dim,
unaligned_heuristic_, unaligned_heuristic_,
aligned_space_); aligned_space_);
BoundBox effective_left_bounds, effective_right_bounds; BoundBox effective_left_bounds, effective_right_bounds;
const int num_right = range.size() - this->num_left; const int num_right = range.size() - this->num_left;
if(aligned_space_ == NULL) { if (aligned_space_ == NULL) {
effective_left_bounds = left_bounds; effective_left_bounds = left_bounds;
effective_right_bounds = right_bounds; effective_right_bounds = right_bounds;
} }
else { else {
effective_left_bounds = BoundBox::empty; effective_left_bounds = BoundBox::empty;
effective_right_bounds = BoundBox::empty; effective_right_bounds = BoundBox::empty;
for(int i = 0; i < this->num_left; ++i) { for (int i = 0; i < this->num_left; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + i).bounds(); BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
effective_left_bounds.grow(prim_boundbox); effective_left_bounds.grow(prim_boundbox);
} }
for(int i = 0; i < num_right; ++i) { for (int i = 0; i < num_right; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds(); BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
effective_right_bounds.grow(prim_boundbox); effective_right_bounds.grow(prim_boundbox);
} }
} }
/* split node ranges */ /* split node ranges */
left = BVHRange(effective_left_bounds, range.start(), this->num_left); left = BVHRange(effective_left_bounds, range.start(), this->num_left);
right = BVHRange(effective_right_bounds, left.end(), num_right); right = BVHRange(effective_right_bounds, left.end(), num_right);
} }
/* Spatial Split */ /* Spatial Split */
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder, BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage, BVHSpatialStorage *storage,
const BVHRange& range, const BVHRange &range,
vector<BVHReference> *references, vector<BVHReference> *references,
float nodeSAH, float nodeSAH,
const BVHUnaligned *unaligned_heuristic, const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space) const Transform *aligned_space)
: sah(FLT_MAX), : sah(FLT_MAX),
dim(0), dim(0),
pos(0.0f), pos(0.0f),
storage_(storage), storage_(storage),
references_(references), references_(references),
unaligned_heuristic_(unaligned_heuristic), unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space) aligned_space_(aligned_space)
{ {
/* initialize bins. */ /* initialize bins. */
BoundBox range_bounds; BoundBox range_bounds;
if(aligned_space == NULL) { if (aligned_space == NULL) {
range_bounds = range.bounds(); range_bounds = range.bounds();
} }
else { else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox( range_bounds = unaligned_heuristic->compute_aligned_boundbox(
range, range, &references->at(0), *aligned_space);
&references->at(0), }
*aligned_space);
}
float3 origin = range_bounds.min; float3 origin = range_bounds.min;
float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS); float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
float3 invBinSize = 1.0f / binSize; float3 invBinSize = 1.0f / binSize;
for(int dim = 0; dim < 3; dim++) { for (int dim = 0; dim < 3; dim++) {
for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) { for (int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
BVHSpatialBin& bin = storage_->bins[dim][i]; BVHSpatialBin &bin = storage_->bins[dim][i];
bin.bounds = BoundBox::empty; bin.bounds = BoundBox::empty;
bin.enter = 0; bin.enter = 0;
bin.exit = 0; bin.exit = 0;
} }
} }
/* chop references into bins. */ /* chop references into bins. */
for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) { for (unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
const BVHReference& ref = references_->at(refIdx); const BVHReference &ref = references_->at(refIdx);
BoundBox prim_bounds = get_prim_bounds(ref); BoundBox prim_bounds = get_prim_bounds(ref);
float3 firstBinf = (prim_bounds.min - origin) * invBinSize; float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
float3 lastBinf = (prim_bounds.max - origin) * invBinSize; float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z); int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z); int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1); firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1); lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
for(int dim = 0; dim < 3; dim++) { for (int dim = 0; dim < 3; dim++) {
BVHReference currRef(get_prim_bounds(ref), BVHReference currRef(
ref.prim_index(), get_prim_bounds(ref), ref.prim_index(), ref.prim_object(), ref.prim_type());
ref.prim_object(),
ref.prim_type());
for(int i = firstBin[dim]; i < lastBin[dim]; i++) { for (int i = firstBin[dim]; i < lastBin[dim]; i++) {
BVHReference leftRef, rightRef; BVHReference leftRef, rightRef;
split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1)); split_reference(
storage_->bins[dim][i].bounds.grow(leftRef.bounds()); builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
currRef = rightRef; storage_->bins[dim][i].bounds.grow(leftRef.bounds());
} currRef = rightRef;
}
storage_->bins[dim][lastBin[dim]].bounds.grow(currRef.bounds()); storage_->bins[dim][lastBin[dim]].bounds.grow(currRef.bounds());
storage_->bins[dim][firstBin[dim]].enter++; storage_->bins[dim][firstBin[dim]].enter++;
storage_->bins[dim][lastBin[dim]].exit++; storage_->bins[dim][lastBin[dim]].exit++;
} }
} }
/* select best split plane. */ /* select best split plane. */
storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS); storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS);
for(int dim = 0; dim < 3; dim++) { for (int dim = 0; dim < 3; dim++) {
/* sweep right to left and determine bounds. */ /* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty; BoundBox right_bounds = BoundBox::empty;
for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) { for (int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
right_bounds.grow(storage_->bins[dim][i].bounds); right_bounds.grow(storage_->bins[dim][i].bounds);
storage_->right_bounds[i - 1] = right_bounds; storage_->right_bounds[i - 1] = right_bounds;
} }
/* sweep left to right and select lowest SAH. */ /* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty; BoundBox left_bounds = BoundBox::empty;
int leftNum = 0; int leftNum = 0;
int rightNum = range.size(); int rightNum = range.size();
for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) { for (int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
left_bounds.grow(storage_->bins[dim][i - 1].bounds); left_bounds.grow(storage_->bins[dim][i - 1].bounds);
leftNum += storage_->bins[dim][i - 1].enter; leftNum += storage_->bins[dim][i - 1].enter;
rightNum -= storage_->bins[dim][i - 1].exit; rightNum -= storage_->bins[dim][i - 1].exit;
float sah = nodeSAH + float sah = nodeSAH + left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
left_bounds.safe_area() * builder.params.primitive_cost(leftNum) + storage_->right_bounds[i - 1].safe_area() *
storage_->right_bounds[i - 1].safe_area() * builder.params.primitive_cost(rightNum); builder.params.primitive_cost(rightNum);
if(sah < this->sah) { if (sah < this->sah) {
this->sah = sah; this->sah = sah;
this->dim = dim; this->dim = dim;
this->pos = origin[dim] + binSize[dim] * (float)i; this->pos = origin[dim] + binSize[dim] * (float)i;
} }
} }
} }
} }
void BVHSpatialSplit::split(BVHBuild *builder, void BVHSpatialSplit::split(BVHBuild *builder,
BVHRange& left, BVHRange &left,
BVHRange& right, BVHRange &right,
const BVHRange& range) const BVHRange &range)
{ {
/* Categorize references and compute bounds. /* Categorize references and compute bounds.
* *
* Left-hand side: [left_start, left_end[ * Left-hand side: [left_start, left_end[
* Uncategorized/split: [left_end, right_start[ * Uncategorized/split: [left_end, right_start[
* Right-hand side: [right_start, refs.size()[ */ * Right-hand side: [right_start, refs.size()[ */
vector<BVHReference>& refs = *references_; vector<BVHReference> &refs = *references_;
int left_start = range.start(); int left_start = range.start();
int left_end = left_start; int left_end = left_start;
int right_start = range.end(); int right_start = range.end();
int right_end = range.end(); int right_end = range.end();
BoundBox left_bounds = BoundBox::empty; BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty; BoundBox right_bounds = BoundBox::empty;
for(int i = left_end; i < right_start; i++) { for (int i = left_end; i < right_start; i++) {
BoundBox prim_bounds = get_prim_bounds(refs[i]); BoundBox prim_bounds = get_prim_bounds(refs[i]);
if(prim_bounds.max[this->dim] <= this->pos) { if (prim_bounds.max[this->dim] <= this->pos) {
/* entirely on the left-hand side */ /* entirely on the left-hand side */
left_bounds.grow(prim_bounds); left_bounds.grow(prim_bounds);
swap(refs[i], refs[left_end++]); swap(refs[i], refs[left_end++]);
} }
else if(prim_bounds.min[this->dim] >= this->pos) { else if (prim_bounds.min[this->dim] >= this->pos) {
/* entirely on the right-hand side */ /* entirely on the right-hand side */
right_bounds.grow(prim_bounds); right_bounds.grow(prim_bounds);
swap(refs[i--], refs[--right_start]); swap(refs[i--], refs[--right_start]);
} }
} }
/* Duplicate or unsplit references intersecting both sides. /* Duplicate or unsplit references intersecting both sides.
* *
* Duplication happens into a temporary pre-allocated vector in order to * Duplication happens into a temporary pre-allocated vector in order to
* reduce number of memmove() calls happening in vector.insert(). * reduce number of memmove() calls happening in vector.insert().
*/ */
vector<BVHReference>& new_refs = storage_->new_references; vector<BVHReference> &new_refs = storage_->new_references;
new_refs.clear(); new_refs.clear();
new_refs.reserve(right_start - left_end); new_refs.reserve(right_start - left_end);
while(left_end < right_start) { while (left_end < right_start) {
/* split reference. */ /* split reference. */
BVHReference curr_ref(get_prim_bounds(refs[left_end]), BVHReference curr_ref(get_prim_bounds(refs[left_end]),
refs[left_end].prim_index(), refs[left_end].prim_index(),
refs[left_end].prim_object(), refs[left_end].prim_object(),
refs[left_end].prim_type()); refs[left_end].prim_type());
BVHReference lref, rref; BVHReference lref, rref;
split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos); split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos);
/* compute SAH for duplicate/unsplit candidates. */ /* compute SAH for duplicate/unsplit candidates. */
BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds. BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds.
BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds. BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds.
BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds. BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds.
BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds. BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds.
lub.grow(curr_ref.bounds()); lub.grow(curr_ref.bounds());
rub.grow(curr_ref.bounds()); rub.grow(curr_ref.bounds());
ldb.grow(lref.bounds()); ldb.grow(lref.bounds());
rdb.grow(rref.bounds()); rdb.grow(rref.bounds());
float lac = builder->params.primitive_cost(left_end - left_start); float lac = builder->params.primitive_cost(left_end - left_start);
float rac = builder->params.primitive_cost(right_end - right_start); float rac = builder->params.primitive_cost(right_end - right_start);
float lbc = builder->params.primitive_cost(left_end - left_start + 1); float lbc = builder->params.primitive_cost(left_end - left_start + 1);
float rbc = builder->params.primitive_cost(right_end - right_start + 1); float rbc = builder->params.primitive_cost(right_end - right_start + 1);
float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac; float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc; float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc; float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH); float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
if(minSAH == unsplitLeftSAH) { if (minSAH == unsplitLeftSAH) {
/* unsplit to left */ /* unsplit to left */
left_bounds = lub; left_bounds = lub;
left_end++; left_end++;
} }
else if(minSAH == unsplitRightSAH) { else if (minSAH == unsplitRightSAH) {
/* unsplit to right */ /* unsplit to right */
right_bounds = rub; right_bounds = rub;
swap(refs[left_end], refs[--right_start]); swap(refs[left_end], refs[--right_start]);
} }
else { else {
/* duplicate */ /* duplicate */
left_bounds = ldb; left_bounds = ldb;
right_bounds = rdb; right_bounds = rdb;
refs[left_end++] = lref; refs[left_end++] = lref;
new_refs.push_back(rref); new_refs.push_back(rref);
right_end++; right_end++;
} }
} }
/* Insert duplicated references into actual array in one go. */ /* Insert duplicated references into actual array in one go. */
if(new_refs.size() != 0) { if (new_refs.size() != 0) {
refs.insert(refs.begin() + (right_end - new_refs.size()), refs.insert(refs.begin() + (right_end - new_refs.size()), new_refs.begin(), new_refs.end());
new_refs.begin(), }
new_refs.end()); if (aligned_space_ != NULL) {
} left_bounds = right_bounds = BoundBox::empty;
if(aligned_space_ != NULL) { for (int i = left_start; i < left_end - left_start; ++i) {
left_bounds = right_bounds = BoundBox::empty; BoundBox prim_boundbox = references_->at(i).bounds();
for(int i = left_start; i < left_end - left_start; ++i) { left_bounds.grow(prim_boundbox);
BoundBox prim_boundbox = references_->at(i).bounds(); }
left_bounds.grow(prim_boundbox); for (int i = right_start; i < right_end - right_start; ++i) {
} BoundBox prim_boundbox = references_->at(i).bounds();
for(int i = right_start; i < right_end - right_start; ++i) { right_bounds.grow(prim_boundbox);
BoundBox prim_boundbox = references_->at(i).bounds(); }
right_bounds.grow(prim_boundbox); }
} left = BVHRange(left_bounds, left_start, left_end - left_start);
} right = BVHRange(right_bounds, right_start, right_end - right_start);
left = BVHRange(left_bounds, left_start, left_end - left_start);
right = BVHRange(right_bounds, right_start, right_end - right_start);
} }
void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh, void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
@@ -354,36 +346,36 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
int prim_index, int prim_index,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds) BoundBox &right_bounds)
{ {
Mesh::Triangle t = mesh->get_triangle(prim_index); Mesh::Triangle t = mesh->get_triangle(prim_index);
const float3 *verts = &mesh->verts[0]; const float3 *verts = &mesh->verts[0];
float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]]; float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
v1 = get_unaligned_point(v1); v1 = get_unaligned_point(v1);
for(int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
float3 v0 = v1; float3 v0 = v1;
int vindex = t.v[i]; int vindex = t.v[i];
v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex]; v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
v1 = get_unaligned_point(v1); v1 = get_unaligned_point(v1);
float v0p = v0[dim]; float v0p = v0[dim];
float v1p = v1[dim]; float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */ /* insert vertex to the boxes it belongs to. */
if(v0p <= pos) if (v0p <= pos)
left_bounds.grow(v0); left_bounds.grow(v0);
if(v0p >= pos) if (v0p >= pos)
right_bounds.grow(v0); right_bounds.grow(v0);
/* edge intersects the plane => insert intersection to both boxes. */ /* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t); left_bounds.grow(t);
right_bounds.grow(t); right_bounds.grow(t);
} }
} }
} }
void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh, void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
@@ -392,163 +384,125 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
int segment_index, int segment_index,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds) BoundBox &right_bounds)
{ {
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/ /* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
Mesh::Curve curve = mesh->get_curve(prim_index); Mesh::Curve curve = mesh->get_curve(prim_index);
const int k0 = curve.first_key + segment_index; const int k0 = curve.first_key + segment_index;
const int k1 = k0 + 1; const int k1 = k0 + 1;
float3 v0 = mesh->curve_keys[k0]; float3 v0 = mesh->curve_keys[k0];
float3 v1 = mesh->curve_keys[k1]; float3 v1 = mesh->curve_keys[k1];
if(tfm != NULL) { if (tfm != NULL) {
v0 = transform_point(tfm, v0); v0 = transform_point(tfm, v0);
v1 = transform_point(tfm, v1); v1 = transform_point(tfm, v1);
} }
v0 = get_unaligned_point(v0); v0 = get_unaligned_point(v0);
v1 = get_unaligned_point(v1); v1 = get_unaligned_point(v1);
float v0p = v0[dim]; float v0p = v0[dim];
float v1p = v1[dim]; float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */ /* insert vertex to the boxes it belongs to. */
if(v0p <= pos) if (v0p <= pos)
left_bounds.grow(v0); left_bounds.grow(v0);
if(v0p >= pos) if (v0p >= pos)
right_bounds.grow(v0); right_bounds.grow(v0);
if(v1p <= pos) if (v1p <= pos)
left_bounds.grow(v1); left_bounds.grow(v1);
if(v1p >= pos) if (v1p >= pos)
right_bounds.grow(v1); right_bounds.grow(v1);
/* edge intersects the plane => insert intersection to both boxes. */ /* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t); left_bounds.grow(t);
right_bounds.grow(t); right_bounds.grow(t);
} }
} }
void BVHSpatialSplit::split_triangle_reference(const BVHReference& ref, void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
const Mesh *mesh, const Mesh *mesh,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds) BoundBox &right_bounds)
{ {
split_triangle_primitive(mesh, split_triangle_primitive(mesh, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
NULL,
ref.prim_index(),
dim,
pos,
left_bounds,
right_bounds);
} }
void BVHSpatialSplit::split_curve_reference(const BVHReference& ref, void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
const Mesh *mesh, const Mesh *mesh,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds) BoundBox &right_bounds)
{ {
split_curve_primitive(mesh, split_curve_primitive(mesh,
NULL, NULL,
ref.prim_index(), ref.prim_index(),
PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()), PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
dim, dim,
pos, pos,
left_bounds, left_bounds,
right_bounds); right_bounds);
} }
void BVHSpatialSplit::split_object_reference(const Object *object, void BVHSpatialSplit::split_object_reference(
int dim, const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
{ {
Mesh *mesh = object->mesh; Mesh *mesh = object->mesh;
for(int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) { for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
split_triangle_primitive(mesh, split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
&object->tfm, }
tri_idx, for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
dim, Mesh::Curve curve = mesh->get_curve(curve_idx);
pos, for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
left_bounds, split_curve_primitive(
right_bounds); mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
} }
for(int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) { }
Mesh::Curve curve = mesh->get_curve(curve_idx);
for(int segment_idx = 0;
segment_idx < curve.num_keys - 1;
++segment_idx)
{
split_curve_primitive(mesh,
&object->tfm,
curve_idx,
segment_idx,
dim,
pos,
left_bounds,
right_bounds);
}
}
} }
void BVHSpatialSplit::split_reference(const BVHBuild& builder, void BVHSpatialSplit::split_reference(const BVHBuild &builder,
BVHReference& left, BVHReference &left,
BVHReference& right, BVHReference &right,
const BVHReference& ref, const BVHReference &ref,
int dim, int dim,
float pos) float pos)
{ {
/* initialize boundboxes */ /* initialize boundboxes */
BoundBox left_bounds = BoundBox::empty; BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty; BoundBox right_bounds = BoundBox::empty;
/* loop over vertices/edges. */ /* loop over vertices/edges. */
const Object *ob = builder.objects[ref.prim_object()]; const Object *ob = builder.objects[ref.prim_object()];
const Mesh *mesh = ob->mesh; const Mesh *mesh = ob->mesh;
if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) { if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
split_triangle_reference(ref, split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
mesh, }
dim, else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
pos, split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
left_bounds, }
right_bounds); else {
} split_object_reference(ob, dim, pos, left_bounds, right_bounds);
else if(ref.prim_type() & PRIMITIVE_ALL_CURVE) { }
split_curve_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
}
else {
split_object_reference(ob,
dim,
pos,
left_bounds,
right_bounds);
}
/* intersect with original bounds. */ /* intersect with original bounds. */
left_bounds.max[dim] = pos; left_bounds.max[dim] = pos;
right_bounds.min[dim] = pos; right_bounds.min[dim] = pos;
left_bounds.intersect(ref.bounds()); left_bounds.intersect(ref.bounds());
right_bounds.intersect(ref.bounds()); right_bounds.intersect(ref.bounds());
/* set references */ /* set references */
left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type()); left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type()); right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -28,235 +28,211 @@ struct Transform;
/* Object Split */ /* Object Split */
class BVHObjectSplit class BVHObjectSplit {
{ public:
public: float sah;
float sah; int dim;
int dim; int num_left;
int num_left; BoundBox left_bounds;
BoundBox left_bounds; BoundBox right_bounds;
BoundBox right_bounds;
BVHObjectSplit() {} BVHObjectSplit()
BVHObjectSplit(BVHBuild *builder, {
BVHSpatialStorage *storage, }
const BVHRange& range, BVHObjectSplit(BVHBuild *builder,
vector<BVHReference> *references, BVHSpatialStorage *storage,
float nodeSAH, const BVHRange &range,
const BVHUnaligned *unaligned_heuristic = NULL, vector<BVHReference> *references,
const Transform *aligned_space = NULL); float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHRange& left, void split(BVHRange &left, BVHRange &right, const BVHRange &range);
BVHRange& right,
const BVHRange& range);
protected: protected:
BVHSpatialStorage *storage_; BVHSpatialStorage *storage_;
vector<BVHReference> *references_; vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_; const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_; const Transform *aligned_space_;
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{ {
if(aligned_space_ == NULL) { if (aligned_space_ == NULL) {
return prim.bounds(); return prim.bounds();
} }
else { else {
return unaligned_heuristic_->compute_aligned_prim_boundbox( return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
prim, *aligned_space_); }
} }
}
}; };
/* Spatial Split */ /* Spatial Split */
class BVHSpatialSplit class BVHSpatialSplit {
{ public:
public: float sah;
float sah; int dim;
int dim; float pos;
float pos;
BVHSpatialSplit() : sah(FLT_MAX), BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f), storage_(NULL), references_(NULL)
dim(0), {
pos(0.0f), }
storage_(NULL), BVHSpatialSplit(const BVHBuild &builder,
references_(NULL) {} BVHSpatialStorage *storage,
BVHSpatialSplit(const BVHBuild& builder, const BVHRange &range,
BVHSpatialStorage *storage, vector<BVHReference> *references,
const BVHRange& range, float nodeSAH,
vector<BVHReference> *references, const BVHUnaligned *unaligned_heuristic = NULL,
float nodeSAH, const Transform *aligned_space = NULL);
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHBuild *builder, void split(BVHBuild *builder, BVHRange &left, BVHRange &right, const BVHRange &range);
BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split_reference(const BVHBuild& builder, void split_reference(const BVHBuild &builder,
BVHReference& left, BVHReference &left,
BVHReference& right, BVHReference &right,
const BVHReference& ref, const BVHReference &ref,
int dim, int dim,
float pos); float pos);
protected: protected:
BVHSpatialStorage *storage_; BVHSpatialStorage *storage_;
vector<BVHReference> *references_; vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_; const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_; const Transform *aligned_space_;
/* Lower-level functions which calculates boundaries of left and right nodes /* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split. * needed for spatial split.
* *
* Operates directly with primitive specified by it's index, reused by higher * Operates directly with primitive specified by it's index, reused by higher
* level splitting functions. * level splitting functions.
*/ */
void split_triangle_primitive(const Mesh *mesh, void split_triangle_primitive(const Mesh *mesh,
const Transform *tfm, const Transform *tfm,
int prim_index, int prim_index,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds); BoundBox &right_bounds);
void split_curve_primitive(const Mesh *mesh, void split_curve_primitive(const Mesh *mesh,
const Transform *tfm, const Transform *tfm,
int prim_index, int prim_index,
int segment_index, int segment_index,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds); BoundBox &right_bounds);
/* Lower-level functions which calculates boundaries of left and right nodes /* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split. * needed for spatial split.
* *
* Operates with BVHReference, internally uses lower level API functions. * Operates with BVHReference, internally uses lower level API functions.
*/ */
void split_triangle_reference(const BVHReference& ref, void split_triangle_reference(const BVHReference &ref,
const Mesh *mesh, const Mesh *mesh,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds); BoundBox &right_bounds);
void split_curve_reference(const BVHReference& ref, void split_curve_reference(const BVHReference &ref,
const Mesh *mesh, const Mesh *mesh,
int dim, int dim,
float pos, float pos,
BoundBox& left_bounds, BoundBox &left_bounds,
BoundBox& right_bounds); BoundBox &right_bounds);
void split_object_reference(const Object *object, void split_object_reference(
int dim, const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{ {
if(aligned_space_ == NULL) { if (aligned_space_ == NULL) {
return prim.bounds(); return prim.bounds();
} }
else { else {
return unaligned_heuristic_->compute_aligned_prim_boundbox( return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
prim, *aligned_space_); }
} }
}
__forceinline float3 get_unaligned_point(const float3& point) const __forceinline float3 get_unaligned_point(const float3 &point) const
{ {
if(aligned_space_ == NULL) { if (aligned_space_ == NULL) {
return point; return point;
} }
else { else {
return transform_point(aligned_space_, point); return transform_point(aligned_space_, point);
} }
} }
}; };
/* Mixed Object-Spatial Split */ /* Mixed Object-Spatial Split */
class BVHMixedSplit class BVHMixedSplit {
{ public:
public: BVHObjectSplit object;
BVHObjectSplit object; BVHSpatialSplit spatial;
BVHSpatialSplit spatial;
float leafSAH; float leafSAH;
float nodeSAH; float nodeSAH;
float minSAH; float minSAH;
bool no_split; bool no_split;
BoundBox bounds; BoundBox bounds;
BVHMixedSplit() {} BVHMixedSplit()
{
}
__forceinline BVHMixedSplit(BVHBuild *builder, __forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage, BVHSpatialStorage *storage,
const BVHRange& range, const BVHRange &range,
vector<BVHReference> *references, vector<BVHReference> *references,
int level, int level,
const BVHUnaligned *unaligned_heuristic = NULL, const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL) const Transform *aligned_space = NULL)
{ {
if(aligned_space == NULL) { if (aligned_space == NULL) {
bounds = range.bounds(); bounds = range.bounds();
} }
else { else {
bounds = unaligned_heuristic->compute_aligned_boundbox( bounds = unaligned_heuristic->compute_aligned_boundbox(
range, range, &references->at(0), *aligned_space);
&references->at(0), }
*aligned_space); /* find split candidates. */
} float area = bounds.safe_area();
/* find split candidates. */
float area = bounds.safe_area();
leafSAH = area * builder->params.primitive_cost(range.size()); leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2); nodeSAH = area * builder->params.node_cost(2);
object = BVHObjectSplit(builder, object = BVHObjectSplit(
storage, builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) { if (builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
BoundBox overlap = object.left_bounds; BoundBox overlap = object.left_bounds;
overlap.intersect(object.right_bounds); overlap.intersect(object.right_bounds);
if(overlap.safe_area() >= builder->spatial_min_overlap) { if (overlap.safe_area() >= builder->spatial_min_overlap) {
spatial = BVHSpatialSplit(*builder, spatial = BVHSpatialSplit(
storage, *builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
range, }
references, }
nodeSAH,
unaligned_heuristic,
aligned_space);
}
}
/* leaf SAH is the lowest => create leaf. */ /* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah); minSAH = min(min(leafSAH, object.sah), spatial.sah);
no_split = (minSAH == leafSAH && no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
builder->range_within_max_leaf_size(range, *references)); }
}
__forceinline void split(BVHBuild *builder, __forceinline void split(BVHBuild *builder,
BVHRange& left, BVHRange &left,
BVHRange& right, BVHRange &right,
const BVHRange& range) const BVHRange &range)
{ {
if(builder->params.use_spatial_split && minSAH == spatial.sah) if (builder->params.use_spatial_split && minSAH == spatial.sah)
spatial.split(builder, left, right, range); spatial.split(builder, left, right, range);
if(!left.size() || !right.size()) if (!left.size() || !right.size())
object.split(left, right, range); object.split(left, right, range);
} }
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_SPLIT_H__ */ #endif /* __BVH_SPLIT_H__ */

View File

@@ -27,150 +27,137 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
BVHUnaligned::BVHUnaligned(const vector<Object *> &objects) : objects_(objects)
BVHUnaligned::BVHUnaligned(const vector<Object*>& objects)
: objects_(objects)
{ {
} }
Transform BVHUnaligned::compute_aligned_space( Transform BVHUnaligned::compute_aligned_space(const BVHObjectBinning &range,
const BVHObjectBinning& range, const BVHReference *references) const
const BVHReference *references) const
{ {
for(int i = range.start(); i < range.end(); ++i) { for (int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i]; const BVHReference &ref = references[i];
Transform aligned_space; Transform aligned_space;
/* Use first primitive which defines correct direction to define /* Use first primitive which defines correct direction to define
* the orientation space. * the orientation space.
*/ */
if(compute_aligned_space(ref, &aligned_space)) { if (compute_aligned_space(ref, &aligned_space)) {
return aligned_space; return aligned_space;
} }
} }
return transform_identity(); return transform_identity();
} }
Transform BVHUnaligned::compute_aligned_space( Transform BVHUnaligned::compute_aligned_space(const BVHRange &range,
const BVHRange& range, const BVHReference *references) const
const BVHReference *references) const
{ {
for(int i = range.start(); i < range.end(); ++i) { for (int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i]; const BVHReference &ref = references[i];
Transform aligned_space; Transform aligned_space;
/* Use first primitive which defines correct direction to define /* Use first primitive which defines correct direction to define
* the orientation space. * the orientation space.
*/ */
if(compute_aligned_space(ref, &aligned_space)) { if (compute_aligned_space(ref, &aligned_space)) {
return aligned_space; return aligned_space;
} }
} }
return transform_identity(); return transform_identity();
} }
bool BVHUnaligned::compute_aligned_space(const BVHReference& ref, bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const
Transform *aligned_space) const
{ {
const Object *object = objects_[ref.prim_object()]; const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type(); const int packed_type = ref.prim_type();
const int type = (packed_type & PRIMITIVE_ALL); const int type = (packed_type & PRIMITIVE_ALL);
if(type & PRIMITIVE_CURVE) { if (type & PRIMITIVE_CURVE) {
const int curve_index = ref.prim_index(); const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type); const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh; const Mesh *mesh = object->mesh;
const Mesh::Curve& curve = mesh->get_curve(curve_index); const Mesh::Curve &curve = mesh->get_curve(curve_index);
const int key = curve.first_key + segment; const int key = curve.first_key + segment;
const float3 v1 = mesh->curve_keys[key], const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1];
v2 = mesh->curve_keys[key + 1]; float length;
float length; const float3 axis = normalize_len(v2 - v1, &length);
const float3 axis = normalize_len(v2 - v1, &length); if (length > 1e-6f) {
if(length > 1e-6f) { *aligned_space = make_transform_frame(axis);
*aligned_space = make_transform_frame(axis); return true;
return true; }
} }
} *aligned_space = transform_identity();
*aligned_space = transform_identity(); return false;
return false;
} }
BoundBox BVHUnaligned::compute_aligned_prim_boundbox( BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
const BVHReference& prim, const Transform &aligned_space) const
const Transform& aligned_space) const
{ {
BoundBox bounds = BoundBox::empty; BoundBox bounds = BoundBox::empty;
const Object *object = objects_[prim.prim_object()]; const Object *object = objects_[prim.prim_object()];
const int packed_type = prim.prim_type(); const int packed_type = prim.prim_type();
const int type = (packed_type & PRIMITIVE_ALL); const int type = (packed_type & PRIMITIVE_ALL);
if(type & PRIMITIVE_CURVE) { if (type & PRIMITIVE_CURVE) {
const int curve_index = prim.prim_index(); const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type); const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh; const Mesh *mesh = object->mesh;
const Mesh::Curve& curve = mesh->get_curve(curve_index); const Mesh::Curve &curve = mesh->get_curve(curve_index);
curve.bounds_grow(segment, curve.bounds_grow(
&mesh->curve_keys[0], segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds);
&mesh->curve_radius[0], }
aligned_space, else {
bounds); bounds = prim.bounds().transformed(&aligned_space);
} }
else { return bounds;
bounds = prim.bounds().transformed(&aligned_space);
}
return bounds;
} }
BoundBox BVHUnaligned::compute_aligned_boundbox( BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHObjectBinning& range, const BVHReference *references,
const BVHReference *references, const Transform &aligned_space,
const Transform& aligned_space, BoundBox *cent_bounds) const
BoundBox *cent_bounds) const
{ {
BoundBox bounds = BoundBox::empty; BoundBox bounds = BoundBox::empty;
if(cent_bounds != NULL) { if (cent_bounds != NULL) {
*cent_bounds = BoundBox::empty; *cent_bounds = BoundBox::empty;
} }
for(int i = range.start(); i < range.end(); ++i) { for (int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i]; const BVHReference &ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space); BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds); bounds.grow(ref_bounds);
if(cent_bounds != NULL) { if (cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2()); cent_bounds->grow(ref_bounds.center2());
} }
} }
return bounds; return bounds;
} }
BoundBox BVHUnaligned::compute_aligned_boundbox( BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHRange &range,
const BVHRange& range, const BVHReference *references,
const BVHReference *references, const Transform &aligned_space,
const Transform& aligned_space, BoundBox *cent_bounds) const
BoundBox *cent_bounds) const
{ {
BoundBox bounds = BoundBox::empty; BoundBox bounds = BoundBox::empty;
if(cent_bounds != NULL) { if (cent_bounds != NULL) {
*cent_bounds = BoundBox::empty; *cent_bounds = BoundBox::empty;
} }
for(int i = range.start(); i < range.end(); ++i) { for (int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i]; const BVHReference &ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space); BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds); bounds.grow(ref_bounds);
if(cent_bounds != NULL) { if (cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2()); cent_bounds->grow(ref_bounds.center2());
} }
} }
return bounds; return bounds;
} }
Transform BVHUnaligned::compute_node_transform( Transform BVHUnaligned::compute_node_transform(const BoundBox &bounds,
const BoundBox& bounds, const Transform &aligned_space)
const Transform& aligned_space)
{ {
Transform space = aligned_space; Transform space = aligned_space;
space.x.w -= bounds.min.x; space.x.w -= bounds.min.x;
space.y.w -= bounds.min.y; space.y.w -= bounds.min.y;
space.z.w -= bounds.min.z; space.z.w -= bounds.min.z;
float3 dim = bounds.max - bounds.min; float3 dim = bounds.max - bounds.min;
return transform_scale(1.0f / max(1e-18f, dim.x), return transform_scale(
1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.x), 1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.z)) *
1.0f / max(1e-18f, dim.z)) * space; space;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -30,51 +30,44 @@ class Object;
/* Helper class to perform calculations needed for unaligned nodes. */ /* Helper class to perform calculations needed for unaligned nodes. */
class BVHUnaligned { class BVHUnaligned {
public: public:
BVHUnaligned(const vector<Object*>& objects); BVHUnaligned(const vector<Object *> &objects);
/* Calculate alignment for the oriented node for a given range. */ /* Calculate alignment for the oriented node for a given range. */
Transform compute_aligned_space( Transform compute_aligned_space(const BVHObjectBinning &range,
const BVHObjectBinning& range, const BVHReference *references) const;
const BVHReference *references) const; Transform compute_aligned_space(const BVHRange &range, const BVHReference *references) const;
Transform compute_aligned_space(
const BVHRange& range,
const BVHReference *references) const;
/* Calculate alignment for the oriented node for a given reference. /* Calculate alignment for the oriented node for a given reference.
* *
* Return true when space was calculated successfully. * Return true when space was calculated successfully.
*/ */
bool compute_aligned_space(const BVHReference& ref, bool compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const;
Transform *aligned_space) const;
/* Calculate primitive's bounding box in given space. */ /* Calculate primitive's bounding box in given space. */
BoundBox compute_aligned_prim_boundbox( BoundBox compute_aligned_prim_boundbox(const BVHReference &prim,
const BVHReference& prim, const Transform &aligned_space) const;
const Transform& aligned_space) const;
/* Calculate bounding box in given space. */ /* Calculate bounding box in given space. */
BoundBox compute_aligned_boundbox( BoundBox compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHObjectBinning& range, const BVHReference *references,
const BVHReference *references, const Transform &aligned_space,
const Transform& aligned_space, BoundBox *cent_bounds = NULL) const;
BoundBox *cent_bounds = NULL) const; BoundBox compute_aligned_boundbox(const BVHRange &range,
BoundBox compute_aligned_boundbox( const BVHReference *references,
const BVHRange& range, const Transform &aligned_space,
const BVHReference *references, BoundBox *cent_bounds = NULL) const;
const Transform& aligned_space,
BoundBox *cent_bounds = NULL) const;
/* Calculate affine transform for node packing. /* Calculate affine transform for node packing.
* Bounds will be in the range of 0..1. * Bounds will be in the range of 0..1.
*/ */
static Transform compute_node_transform(const BoundBox& bounds, static Transform compute_node_transform(const BoundBox &bounds, const Transform &aligned_space);
const Transform& aligned_space);
protected: protected:
/* List of objects BVH is being created for. */ /* List of objects BVH is being created for. */
const vector<Object*>& objects_; const vector<Object *> &objects_;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __BVH_UNALIGNED_H__ */ #endif /* __BVH_UNALIGNED_H__ */

View File

@@ -2,24 +2,24 @@
# Precompiled libraries tips and hints, for find_package(). # Precompiled libraries tips and hints, for find_package().
if(CYCLES_STANDALONE_REPOSITORY) if(CYCLES_STANDALONE_REPOSITORY)
if(APPLE OR WIN32) if(APPLE OR WIN32)
include(precompiled_libs) include(precompiled_libs)
endif() endif()
endif() endif()
########################################################################### ###########################################################################
# GLUT # GLUT
if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
set(GLUT_ROOT_PATH ${CYCLES_GLUT}) set(GLUT_ROOT_PATH ${CYCLES_GLUT})
find_package(GLUT) find_package(GLUT)
message(STATUS "GLUT_FOUND=${GLUT_FOUND}") message(STATUS "GLUT_FOUND=${GLUT_FOUND}")
include_directories( include_directories(
SYSTEM SYSTEM
${GLUT_INCLUDE_DIR} ${GLUT_INCLUDE_DIR}
) )
endif() endif()
########################################################################### ###########################################################################
@@ -27,125 +27,125 @@ endif()
# Workaround for unconventional variable name use in Blender. # Workaround for unconventional variable name use in Blender.
if(NOT CYCLES_STANDALONE_REPOSITORY) if(NOT CYCLES_STANDALONE_REPOSITORY)
set(GLEW_INCLUDE_DIR "${GLEW_INCLUDE_PATH}") set(GLEW_INCLUDE_DIR "${GLEW_INCLUDE_PATH}")
endif() endif()
if(WITH_CYCLES_STANDALONE) if(WITH_CYCLES_STANDALONE)
set(CYCLES_APP_GLEW_LIBRARY ${BLENDER_GLEW_LIBRARIES}) set(CYCLES_APP_GLEW_LIBRARY ${BLENDER_GLEW_LIBRARIES})
endif() endif()
########################################################################### ###########################################################################
# CUDA # CUDA
if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD) if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
find_package(CUDA) # Try to auto locate CUDA toolkit find_package(CUDA) # Try to auto locate CUDA toolkit
if(CUDA_FOUND) if(CUDA_FOUND)
message(STATUS "CUDA nvcc = ${CUDA_NVCC_EXECUTABLE}") message(STATUS "CUDA nvcc = ${CUDA_NVCC_EXECUTABLE}")
else() else()
message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES") message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES")
set(WITH_CYCLES_CUDA_BINARIES OFF) set(WITH_CYCLES_CUDA_BINARIES OFF)
if(NOT WITH_CUDA_DYNLOAD) if(NOT WITH_CUDA_DYNLOAD)
message(STATUS "Additionally falling back to dynamic CUDA load") message(STATUS "Additionally falling back to dynamic CUDA load")
set(WITH_CUDA_DYNLOAD ON) set(WITH_CUDA_DYNLOAD ON)
endif() endif()
endif() endif()
endif() endif()
# Packages which are being found by Blender when building from inside Blender # Packages which are being found by Blender when building from inside Blender
# source code. but which we need to take care of when building Cycles from a # source code. but which we need to take care of when building Cycles from a
# standalone repository # standalone repository
if(CYCLES_STANDALONE_REPOSITORY) if(CYCLES_STANDALONE_REPOSITORY)
# PThreads # PThreads
# TODO(sergey): Bloody exception, handled in precompiled_libs.cmake. # TODO(sergey): Bloody exception, handled in precompiled_libs.cmake.
if(NOT WIN32) if(NOT WIN32)
set(CMAKE_THREAD_PREFER_PTHREAD TRUE) set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif() endif()
#### ####
# OpenGL # OpenGL
# TODO(sergey): We currently re-use the same variable name as we use # TODO(sergey): We currently re-use the same variable name as we use
# in Blender. Ideally we need to make it CYCLES_GL_LIBRARIES. # in Blender. Ideally we need to make it CYCLES_GL_LIBRARIES.
find_package(OpenGL REQUIRED) find_package(OpenGL REQUIRED)
find_package(GLEW REQUIRED) find_package(GLEW REQUIRED)
list(APPEND BLENDER_GL_LIBRARIES list(APPEND BLENDER_GL_LIBRARIES
"${OPENGL_gl_LIBRARY}" "${OPENGL_gl_LIBRARY}"
"${OPENGL_glu_LIBRARY}" "${OPENGL_glu_LIBRARY}"
"${GLEW_LIBRARY}" "${GLEW_LIBRARY}"
) )
#### ####
# OpenImageIO # OpenImageIO
find_package(OpenImageIO REQUIRED) find_package(OpenImageIO REQUIRED)
if(OPENIMAGEIO_PUGIXML_FOUND) if(OPENIMAGEIO_PUGIXML_FOUND)
set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR/OpenImageIO}") set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR/OpenImageIO}")
set(PUGIXML_LIBRARIES "") set(PUGIXML_LIBRARIES "")
else() else()
find_package(PugiXML REQUIRED) find_package(PugiXML REQUIRED)
endif() endif()
# OIIO usually depends on OpenEXR, so find this library # OIIO usually depends on OpenEXR, so find this library
# but don't make it required. # but don't make it required.
find_package(OpenEXR) find_package(OpenEXR)
#### ####
# OpenShadingLanguage # OpenShadingLanguage
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
find_package(OpenShadingLanguage REQUIRED) find_package(OpenShadingLanguage REQUIRED)
find_package(LLVM REQUIRED) find_package(LLVM REQUIRED)
endif() endif()
#### ####
# OpenColorIO # OpenColorIO
if(WITH_OPENCOLORIO) if(WITH_OPENCOLORIO)
find_package(OpenColorIO REQUIRED) find_package(OpenColorIO REQUIRED)
endif() endif()
#### ####
# Boost # Boost
set(__boost_packages filesystem regex system thread date_time) set(__boost_packages filesystem regex system thread date_time)
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)
list(APPEND __boost_packages serialization) list(APPEND __boost_packages serialization)
endif() endif()
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
# TODO(sergey): This is because of the way how our precompiled # TODO(sergey): This is because of the way how our precompiled
# libraries works, could be different for someone's else libs.. # libraries works, could be different for someone's else libs..
if(APPLE OR MSVC) if(APPLE OR MSVC)
list(APPEND __boost_packages wave) list(APPEND __boost_packages wave)
elseif(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6")) elseif(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
list(APPEND __boost_packages wave) list(APPEND __boost_packages wave)
endif() endif()
endif() endif()
find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED) find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED)
if(NOT Boost_FOUND) if(NOT Boost_FOUND)
# Try to find non-multithreaded if -mt not found, this flag # Try to find non-multithreaded if -mt not found, this flag
# doesn't matter for us, it has nothing to do with thread # doesn't matter for us, it has nothing to do with thread
# safety, but keep it to not disturb build setups. # safety, but keep it to not disturb build setups.
set(Boost_USE_MULTITHREADED OFF) set(Boost_USE_MULTITHREADED OFF)
find_package(Boost 1.48 COMPONENTS ${__boost_packages}) find_package(Boost 1.48 COMPONENTS ${__boost_packages})
endif() endif()
unset(__boost_packages) unset(__boost_packages)
set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS}) set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
set(BOOST_LIBRARIES ${Boost_LIBRARIES}) set(BOOST_LIBRARIES ${Boost_LIBRARIES})
set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS}) set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB") set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
#### ####
# embree # embree
if(WITH_CYCLES_EMBREE) if(WITH_CYCLES_EMBREE)
find_package(embree 3.2.4 REQUIRED) find_package(embree 3.2.4 REQUIRED)
endif() endif()
#### ####
# Logging # Logging
if(WITH_CYCLES_LOGGING) if(WITH_CYCLES_LOGGING)
find_package(Glog REQUIRED) find_package(Glog REQUIRED)
find_package(Gflags REQUIRED) find_package(Gflags REQUIRED)
endif() endif()
unset(_lib_DIR) unset(_lib_DIR)
else() else()
set(LLVM_LIBRARIES ${LLVM_LIBRARY}) set(LLVM_LIBRARIES ${LLVM_LIBRARY})
endif() endif()

View File

@@ -1,15 +1,15 @@
function(cycles_set_solution_folder target) function(cycles_set_solution_folder target)
if(WINDOWS_USE_VISUAL_STUDIO_FOLDERS) if(WINDOWS_USE_VISUAL_STUDIO_FOLDERS)
get_filename_component(folderdir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(folderdir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
string(REPLACE ${CMAKE_SOURCE_DIR} "" folderdir ${folderdir}) string(REPLACE ${CMAKE_SOURCE_DIR} "" folderdir ${folderdir})
set_target_properties(${target} PROPERTIES FOLDER ${folderdir}) set_target_properties(${target} PROPERTIES FOLDER ${folderdir})
endif() endif()
endfunction() endfunction()
macro(cycles_add_library target library_deps) macro(cycles_add_library target library_deps)
add_library(${target} ${ARGN}) add_library(${target} ${ARGN})
if(NOT ("${library_deps}" STREQUAL "")) if(NOT ("${library_deps}" STREQUAL ""))
target_link_libraries(${target} "${library_deps}") target_link_libraries(${target} "${library_deps}")
endif() endif()
cycles_set_solution_folder(${target}) cycles_set_solution_folder(${target})
endmacro() endmacro()

View File

@@ -1,61 +1,61 @@
set(INC set(INC
.. ..
../../glew-mx ../../glew-mx
) )
set(INC_SYS set(INC_SYS
${GLEW_INCLUDE_DIR} ${GLEW_INCLUDE_DIR}
../../../extern/clew/include ../../../extern/clew/include
) )
if(WITH_CUDA_DYNLOAD) if(WITH_CUDA_DYNLOAD)
list(APPEND INC list(APPEND INC
../../../extern/cuew/include ../../../extern/cuew/include
) )
add_definitions(-DWITH_CUDA_DYNLOAD) add_definitions(-DWITH_CUDA_DYNLOAD)
else() else()
list(APPEND INC_SYS list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE} ${CUDA_TOOLKIT_INCLUDE}
) )
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}") add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif() endif()
set(SRC set(SRC
device.cpp device.cpp
device_cpu.cpp device_cpu.cpp
device_cuda.cpp device_cuda.cpp
device_denoising.cpp device_denoising.cpp
device_memory.cpp device_memory.cpp
device_multi.cpp device_multi.cpp
device_opencl.cpp device_opencl.cpp
device_split_kernel.cpp device_split_kernel.cpp
device_task.cpp device_task.cpp
) )
set(SRC_OPENCL set(SRC_OPENCL
opencl/opencl.h opencl/opencl.h
opencl/memory_manager.h opencl/memory_manager.h
opencl/opencl_split.cpp opencl/opencl_split.cpp
opencl/opencl_util.cpp opencl/opencl_util.cpp
opencl/memory_manager.cpp opencl/memory_manager.cpp
) )
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)
list(APPEND SRC list(APPEND SRC
device_network.cpp device_network.cpp
) )
endif() endif()
set(SRC_HEADERS set(SRC_HEADERS
device.h device.h
device_denoising.h device_denoising.h
device_memory.h device_memory.h
device_intern.h device_intern.h
device_network.h device_network.h
device_split_kernel.h device_split_kernel.h
device_task.h device_task.h
) )
set(LIB set(LIB
@@ -63,27 +63,27 @@ set(LIB
) )
if(WITH_CUDA_DYNLOAD) if(WITH_CUDA_DYNLOAD)
list(APPEND LIB list(APPEND LIB
extern_cuew extern_cuew
) )
else() else()
list(APPEND LIB list(APPEND LIB
${CUDA_CUDA_LIBRARY} ${CUDA_CUDA_LIBRARY}
) )
endif() endif()
add_definitions(${GL_DEFINITIONS}) add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK) add_definitions(-DWITH_NETWORK)
endif() endif()
if(WITH_CYCLES_DEVICE_OPENCL) if(WITH_CYCLES_DEVICE_OPENCL)
add_definitions(-DWITH_OPENCL) add_definitions(-DWITH_OPENCL)
endif() endif()
if(WITH_CYCLES_DEVICE_CUDA) if(WITH_CYCLES_DEVICE_CUDA)
add_definitions(-DWITH_CUDA) add_definitions(-DWITH_CUDA)
endif() endif()
if(WITH_CYCLES_DEVICE_MULTI) if(WITH_CYCLES_DEVICE_MULTI)
add_definitions(-DWITH_MULTI) add_definitions(-DWITH_MULTI)
endif() endif()
include_directories(${INC}) include_directories(${INC})

View File

@@ -44,572 +44,577 @@ uint Device::devices_initialized_mask = 0;
/* Device Requested Features */ /* Device Requested Features */
std::ostream& operator <<(std::ostream &os, std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
const DeviceRequestedFeatures& requested_features)
{ {
os << "Experimental features: " os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
<< (requested_features.experimental ? "On" : "Off") << std::endl; os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl; /* TODO(sergey): Decode bitflag into list of names. */
/* TODO(sergey): Decode bitflag into list of names. */ os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Nodes features: " << requested_features.nodes_features << std::endl; os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Hair: " os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
<< string_from_bool(requested_features.use_hair) << std::endl; << std::endl;
os << "Use Object Motion: " os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
<< string_from_bool(requested_features.use_object_motion) << std::endl; << std::endl;
os << "Use Camera Motion: " os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
<< string_from_bool(requested_features.use_camera_motion) << std::endl; os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Baking: " os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
<< string_from_bool(requested_features.use_baking) << std::endl; os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
os << "Use Subsurface: " << std::endl;
<< string_from_bool(requested_features.use_subsurface) << std::endl; os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
os << "Use Volume: " << std::endl;
<< string_from_bool(requested_features.use_volume) << std::endl; os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
os << "Use Branched Integrator: " << std::endl;
<< string_from_bool(requested_features.use_integrator_branched) << std::endl; os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
os << "Use Patch Evaluation: " << std::endl;
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl; os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Transparent Shadows: " os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
<< string_from_bool(requested_features.use_transparent) << std::endl; << std::endl;
os << "Use Principled BSDF: " os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
<< string_from_bool(requested_features.use_principled) << std::endl; << std::endl;
os << "Use Denoising: " return os;
<< string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: "
<< string_from_bool(requested_features.use_true_displacement) << std::endl;
os << "Use Background Light: "
<< string_from_bool(requested_features.use_background_light) << std::endl;
return os;
} }
/* Device */ /* Device */
Device::~Device() Device::~Device()
{ {
if(!background) { if (!background) {
if(vertex_buffer != 0) { if (vertex_buffer != 0) {
glDeleteBuffers(1, &vertex_buffer); glDeleteBuffers(1, &vertex_buffer);
} }
if(fallback_shader_program != 0) { if (fallback_shader_program != 0) {
glDeleteProgram(fallback_shader_program); glDeleteProgram(fallback_shader_program);
} }
} }
} }
/* TODO move shaders to standalone .glsl file. */ /* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER = const char *FALLBACK_VERTEX_SHADER =
"#version 330\n" "#version 330\n"
"uniform vec2 fullscreen;\n" "uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n" "in vec2 texCoord;\n"
"in vec2 pos;\n" "in vec2 pos;\n"
"out vec2 texCoord_interp;\n" "out vec2 texCoord_interp;\n"
"\n" "\n"
"vec2 normalize_coordinates()\n" "vec2 normalize_coordinates()\n"
"{\n" "{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n" " return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n" "}\n"
"\n" "\n"
"void main()\n" "void main()\n"
"{\n" "{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n" " gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n" " texCoord_interp = texCoord;\n"
"}\n\0"; "}\n\0";
const char *FALLBACK_FRAGMENT_SHADER = const char *FALLBACK_FRAGMENT_SHADER =
"#version 330\n" "#version 330\n"
"uniform sampler2D image_texture;\n" "uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n" "in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n" "out vec4 fragColor;\n"
"\n" "\n"
"void main()\n" "void main()\n"
"{\n" "{\n"
" fragColor = texture(image_texture, texCoord_interp);\n" " fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0"; "}\n\0";
static void shader_print_errors(const char *task, const char *log, const char *code) static void shader_print_errors(const char *task, const char *log, const char *code)
{ {
LOG(ERROR) << "Shader: " << task << " error:"; LOG(ERROR) << "Shader: " << task << " error:";
LOG(ERROR) << "===== shader string ===="; LOG(ERROR) << "===== shader string ====";
stringstream stream(code); stringstream stream(code);
string partial; string partial;
int line = 1; int line = 1;
while(getline(stream, partial, '\n')) { while (getline(stream, partial, '\n')) {
if(line < 10) { if (line < 10) {
LOG(ERROR) << " " << line << " " << partial; LOG(ERROR) << " " << line << " " << partial;
} }
else { else {
LOG(ERROR) << line << " " << partial; LOG(ERROR) << line << " " << partial;
} }
line++; line++;
} }
LOG(ERROR) << log; LOG(ERROR) << log;
} }
static int bind_fallback_shader(void) static int bind_fallback_shader(void)
{ {
GLint status; GLint status;
GLchar log[5000]; GLchar log[5000];
GLsizei length = 0; GLsizei length = 0;
GLuint program = 0; GLuint program = 0;
struct Shader { struct Shader {
const char *source; const char *source;
GLenum type; GLenum type;
} shaders[2] = { } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER}, {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}
};
program = glCreateProgram(); program = glCreateProgram();
for(int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
GLuint shader = glCreateShader(shaders[i].type); GLuint shader = glCreateShader(shaders[i].type);
string source_str = shaders[i].source; string source_str = shaders[i].source;
const char *c_str = source_str.c_str(); const char *c_str = source_str.c_str();
glShaderSource(shader, 1, &c_str, NULL); glShaderSource(shader, 1, &c_str, NULL);
glCompileShader(shader); glCompileShader(shader);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status); glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if(!status) { if (!status) {
glGetShaderInfoLog(shader, sizeof(log), &length, log); glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str); shader_print_errors("compile", log, c_str);
return 0; return 0;
} }
glAttachShader(program, shader); glAttachShader(program, shader);
} }
/* Link output. */ /* Link output. */
glBindFragDataLocation(program, 0, "fragColor"); glBindFragDataLocation(program, 0, "fragColor");
/* Link and error check. */ /* Link and error check. */
glLinkProgram(program); glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &status); glGetProgramiv(program, GL_LINK_STATUS, &status);
if(!status) { if (!status) {
glGetShaderInfoLog(program, sizeof(log), &length, log); glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER); shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER); shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
return 0; return 0;
} }
return program; return program;
} }
bool Device::bind_fallback_display_space_shader(const float width, const float height) bool Device::bind_fallback_display_space_shader(const float width, const float height)
{ {
if(fallback_status == FALLBACK_SHADER_STATUS_ERROR) { if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
return false; return false;
} }
if(fallback_status == FALLBACK_SHADER_STATUS_NONE) { if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
fallback_shader_program = bind_fallback_shader(); fallback_shader_program = bind_fallback_shader();
fallback_status = FALLBACK_SHADER_STATUS_ERROR; fallback_status = FALLBACK_SHADER_STATUS_ERROR;
if(fallback_shader_program == 0) { if (fallback_shader_program == 0) {
return false; return false;
} }
glUseProgram(fallback_shader_program); glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture"); image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if(image_texture_location < 0) { if (image_texture_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform."; LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
return false; return false;
} }
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen"); fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if(fullscreen_location < 0) { if (fullscreen_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform."; LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
return false; return false;
} }
fallback_status = FALLBACK_SHADER_STATUS_SUCCESS; fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
} }
/* Run this every time. */ /* Run this every time. */
glUseProgram(fallback_shader_program); glUseProgram(fallback_shader_program);
glUniform1i(image_texture_location, 0); glUniform1i(image_texture_location, 0);
glUniform2f(fullscreen_location, width, height); glUniform2f(fullscreen_location, width, height);
return true; return true;
} }
void Device::draw_pixels( void Device::draw_pixels(device_memory &rgba,
device_memory& rgba, int y, int y,
int w, int h, int width, int height, int w,
int dx, int dy, int dw, int dh, int h,
bool transparent, const DeviceDrawParams &draw_params) int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{ {
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL); const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
assert(rgba.type == MEM_PIXELS); assert(rgba.type == MEM_PIXELS);
mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1)); mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
GLuint texid; GLuint texid;
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &texid); glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid); glBindTexture(GL_TEXTURE_2D, texid);
if(rgba.data_type == TYPE_HALF) { if (rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf*)rgba.host_pointer; GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
data_pointer += 4 * y * w; data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
} }
else { else {
uint8_t *data_pointer = (uint8_t*)rgba.host_pointer; uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
data_pointer += 4 * y * w; data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
} }
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
if(transparent) { if (transparent) {
glEnable(GL_BLEND); glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
} }
GLint shader_program; GLint shader_program;
if(use_fallback_shader) { if (use_fallback_shader) {
if(!bind_fallback_display_space_shader(dw, dh)) { if (!bind_fallback_display_space_shader(dw, dh)) {
return; return;
} }
shader_program = fallback_shader_program; shader_program = fallback_shader_program;
} }
else { else {
draw_params.bind_display_space_shader_cb(); draw_params.bind_display_space_shader_cb();
glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program); glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
} }
if(!vertex_buffer) { if (!vertex_buffer) {
glGenBuffers(1, &vertex_buffer); glGenBuffers(1, &vertex_buffer);
} }
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */ /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
if(vpointer) { if (vpointer) {
/* texture coordinate - vertex pair */ /* texture coordinate - vertex pair */
vpointer[0] = 0.0f; vpointer[0] = 0.0f;
vpointer[1] = 0.0f; vpointer[1] = 0.0f;
vpointer[2] = dx; vpointer[2] = dx;
vpointer[3] = dy; vpointer[3] = dy;
vpointer[4] = 1.0f; vpointer[4] = 1.0f;
vpointer[5] = 0.0f; vpointer[5] = 0.0f;
vpointer[6] = (float)width + dx; vpointer[6] = (float)width + dx;
vpointer[7] = dy; vpointer[7] = dy;
vpointer[8] = 1.0f; vpointer[8] = 1.0f;
vpointer[9] = 1.0f; vpointer[9] = 1.0f;
vpointer[10] = (float)width + dx; vpointer[10] = (float)width + dx;
vpointer[11] = (float)height + dy; vpointer[11] = (float)height + dy;
vpointer[12] = 0.0f; vpointer[12] = 0.0f;
vpointer[13] = 1.0f; vpointer[13] = 1.0f;
vpointer[14] = dx; vpointer[14] = dx;
vpointer[15] = (float)height + dy; vpointer[15] = (float)height + dy;
if(vertex_buffer) { if (vertex_buffer) {
glUnmapBuffer(GL_ARRAY_BUFFER); glUnmapBuffer(GL_ARRAY_BUFFER);
} }
} }
GLuint vertex_array_object; GLuint vertex_array_object;
GLuint position_attribute, texcoord_attribute; GLuint position_attribute, texcoord_attribute;
glGenVertexArrays(1, &vertex_array_object); glGenVertexArrays(1, &vertex_array_object);
glBindVertexArray(vertex_array_object); glBindVertexArray(vertex_array_object);
texcoord_attribute = glGetAttribLocation(shader_program, "texCoord"); texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
position_attribute = glGetAttribLocation(shader_program, "pos"); position_attribute = glGetAttribLocation(shader_program, "pos");
glEnableVertexAttribArray(texcoord_attribute); glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute); glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0); glVertexAttribPointer(
glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2)); texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4); glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
if(vertex_buffer) { if (vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
} }
if(use_fallback_shader) { if (use_fallback_shader) {
glUseProgram(0); glUseProgram(0);
} }
else { else {
draw_params.unbind_display_space_shader_cb(); draw_params.unbind_display_space_shader_cb();
} }
glDeleteVertexArrays(1, &vertex_array_object); glDeleteVertexArrays(1, &vertex_array_object);
glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, 0);
glDeleteTextures(1, &texid); glDeleteTextures(1, &texid);
if(transparent) { if (transparent) {
glDisable(GL_BLEND); glDisable(GL_BLEND);
} }
} }
Device *Device::create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background) Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{ {
Device *device; Device *device;
switch(info.type) { switch (info.type) {
case DEVICE_CPU: case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler, background); device = device_cpu_create(info, stats, profiler, background);
break; break;
#ifdef WITH_CUDA #ifdef WITH_CUDA
case DEVICE_CUDA: case DEVICE_CUDA:
if(device_cuda_init()) if (device_cuda_init())
device = device_cuda_create(info, stats, profiler, background); device = device_cuda_create(info, stats, profiler, background);
else else
device = NULL; device = NULL;
break; break;
#endif #endif
#ifdef WITH_MULTI #ifdef WITH_MULTI
case DEVICE_MULTI: case DEVICE_MULTI:
device = device_multi_create(info, stats, profiler, background); device = device_multi_create(info, stats, profiler, background);
break; break;
#endif #endif
#ifdef WITH_NETWORK #ifdef WITH_NETWORK
case DEVICE_NETWORK: case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1"); device = device_network_create(info, stats, profiler, "127.0.0.1");
break; break;
#endif #endif
#ifdef WITH_OPENCL #ifdef WITH_OPENCL
case DEVICE_OPENCL: case DEVICE_OPENCL:
if(device_opencl_init()) if (device_opencl_init())
device = device_opencl_create(info, stats, profiler, background); device = device_opencl_create(info, stats, profiler, background);
else else
device = NULL; device = NULL;
break; break;
#endif #endif
default: default:
return NULL; return NULL;
} }
return device; return device;
} }
DeviceType Device::type_from_string(const char *name) DeviceType Device::type_from_string(const char *name)
{ {
if(strcmp(name, "CPU") == 0) if (strcmp(name, "CPU") == 0)
return DEVICE_CPU; return DEVICE_CPU;
else if(strcmp(name, "CUDA") == 0) else if (strcmp(name, "CUDA") == 0)
return DEVICE_CUDA; return DEVICE_CUDA;
else if(strcmp(name, "OPENCL") == 0) else if (strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL; return DEVICE_OPENCL;
else if(strcmp(name, "NETWORK") == 0) else if (strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK; return DEVICE_NETWORK;
else if(strcmp(name, "MULTI") == 0) else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI; return DEVICE_MULTI;
return DEVICE_NONE; return DEVICE_NONE;
} }
string Device::string_from_type(DeviceType type) string Device::string_from_type(DeviceType type)
{ {
if(type == DEVICE_CPU) if (type == DEVICE_CPU)
return "CPU"; return "CPU";
else if(type == DEVICE_CUDA) else if (type == DEVICE_CUDA)
return "CUDA"; return "CUDA";
else if(type == DEVICE_OPENCL) else if (type == DEVICE_OPENCL)
return "OPENCL"; return "OPENCL";
else if(type == DEVICE_NETWORK) else if (type == DEVICE_NETWORK)
return "NETWORK"; return "NETWORK";
else if(type == DEVICE_MULTI) else if (type == DEVICE_MULTI)
return "MULTI"; return "MULTI";
return ""; return "";
} }
vector<DeviceType> Device::available_types() vector<DeviceType> Device::available_types()
{ {
vector<DeviceType> types; vector<DeviceType> types;
types.push_back(DEVICE_CPU); types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA #ifdef WITH_CUDA
types.push_back(DEVICE_CUDA); types.push_back(DEVICE_CUDA);
#endif #endif
#ifdef WITH_OPENCL #ifdef WITH_OPENCL
types.push_back(DEVICE_OPENCL); types.push_back(DEVICE_OPENCL);
#endif #endif
#ifdef WITH_NETWORK #ifdef WITH_NETWORK
types.push_back(DEVICE_NETWORK); types.push_back(DEVICE_NETWORK);
#endif #endif
return types; return types;
} }
vector<DeviceInfo> Device::available_devices(uint mask) vector<DeviceInfo> Device::available_devices(uint mask)
{ {
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
* be broken and cause crashes when only trying to get device info, so * be broken and cause crashes when only trying to get device info, so
* we don't want to do any initialization until the user chooses to. */ * we don't want to do any initialization until the user chooses to. */
thread_scoped_lock lock(device_mutex); thread_scoped_lock lock(device_mutex);
vector<DeviceInfo> devices; vector<DeviceInfo> devices;
#ifdef WITH_OPENCL #ifdef WITH_OPENCL
if(mask & DEVICE_MASK_OPENCL) { if (mask & DEVICE_MASK_OPENCL) {
if(!(devices_initialized_mask & DEVICE_MASK_OPENCL)) { if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if(device_opencl_init()) { if (device_opencl_init()) {
device_opencl_info(opencl_devices); device_opencl_info(opencl_devices);
} }
devices_initialized_mask |= DEVICE_MASK_OPENCL; devices_initialized_mask |= DEVICE_MASK_OPENCL;
} }
foreach(DeviceInfo& info, opencl_devices) { foreach (DeviceInfo &info, opencl_devices) {
devices.push_back(info); devices.push_back(info);
} }
} }
#endif #endif
#ifdef WITH_CUDA #ifdef WITH_CUDA
if(mask & DEVICE_MASK_CUDA) { if (mask & DEVICE_MASK_CUDA) {
if(!(devices_initialized_mask & DEVICE_MASK_CUDA)) { if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if(device_cuda_init()) { if (device_cuda_init()) {
device_cuda_info(cuda_devices); device_cuda_info(cuda_devices);
} }
devices_initialized_mask |= DEVICE_MASK_CUDA; devices_initialized_mask |= DEVICE_MASK_CUDA;
} }
foreach(DeviceInfo& info, cuda_devices) { foreach (DeviceInfo &info, cuda_devices) {
devices.push_back(info); devices.push_back(info);
} }
} }
#endif #endif
if(mask & DEVICE_MASK_CPU) { if (mask & DEVICE_MASK_CPU) {
if(!(devices_initialized_mask & DEVICE_MASK_CPU)) { if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices); device_cpu_info(cpu_devices);
devices_initialized_mask |= DEVICE_MASK_CPU; devices_initialized_mask |= DEVICE_MASK_CPU;
} }
foreach(DeviceInfo& info, cpu_devices) { foreach (DeviceInfo &info, cpu_devices) {
devices.push_back(info); devices.push_back(info);
} }
} }
#ifdef WITH_NETWORK #ifdef WITH_NETWORK
if(mask & DEVICE_MASK_NETWORK) { if (mask & DEVICE_MASK_NETWORK) {
if(!(devices_initialized_mask & DEVICE_MASK_NETWORK)) { if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
device_network_info(network_devices); device_network_info(network_devices);
devices_initialized_mask |= DEVICE_MASK_NETWORK; devices_initialized_mask |= DEVICE_MASK_NETWORK;
} }
foreach(DeviceInfo& info, network_devices) { foreach (DeviceInfo &info, network_devices) {
devices.push_back(info); devices.push_back(info);
} }
} }
#endif #endif
return devices; return devices;
} }
string Device::device_capabilities(uint mask) string Device::device_capabilities(uint mask)
{ {
thread_scoped_lock lock(device_mutex); thread_scoped_lock lock(device_mutex);
string capabilities = ""; string capabilities = "";
if(mask & DEVICE_MASK_CPU) { if (mask & DEVICE_MASK_CPU) {
capabilities += "\nCPU device capabilities: "; capabilities += "\nCPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n"; capabilities += device_cpu_capabilities() + "\n";
} }
#ifdef WITH_OPENCL #ifdef WITH_OPENCL
if(mask & DEVICE_MASK_OPENCL) { if (mask & DEVICE_MASK_OPENCL) {
if(device_opencl_init()) { if (device_opencl_init()) {
capabilities += "\nOpenCL device capabilities:\n"; capabilities += "\nOpenCL device capabilities:\n";
capabilities += device_opencl_capabilities(); capabilities += device_opencl_capabilities();
} }
} }
#endif #endif
#ifdef WITH_CUDA #ifdef WITH_CUDA
if(mask & DEVICE_MASK_CUDA) { if (mask & DEVICE_MASK_CUDA) {
if(device_cuda_init()) { if (device_cuda_init()) {
capabilities += "\nCUDA device capabilities:\n"; capabilities += "\nCUDA device capabilities:\n";
capabilities += device_cuda_capabilities(); capabilities += device_cuda_capabilities();
} }
} }
#endif #endif
return capabilities; return capabilities;
} }
DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background) DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background)
{ {
assert(subdevices.size() > 0); assert(subdevices.size() > 0);
if(subdevices.size() == 1) { if (subdevices.size() == 1) {
/* No multi device needed. */ /* No multi device needed. */
return subdevices.front(); return subdevices.front();
} }
DeviceInfo info; DeviceInfo info;
info.type = DEVICE_MULTI; info.type = DEVICE_MULTI;
info.id = "MULTI"; info.id = "MULTI";
info.description = "Multi Device"; info.description = "Multi Device";
info.num = 0; info.num = 0;
info.has_half_images = true; info.has_half_images = true;
info.has_volume_decoupled = true; info.has_volume_decoupled = true;
info.has_osl = true; info.has_osl = true;
info.has_profiling = true; info.has_profiling = true;
foreach(const DeviceInfo &device, subdevices) { foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */ /* Ensure CPU device does not slow down GPU. */
if(device.type == DEVICE_CPU && subdevices.size() > 1) { if (device.type == DEVICE_CPU && subdevices.size() > 1) {
if(background) { if (background) {
int orig_cpu_threads = (threads)? threads: system_cpu_thread_count(); int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0); int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
VLOG(1) << "CPU render threads reduced from " VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
<< orig_cpu_threads << " to " << cpu_threads << ", to dedicate to GPU.";
<< ", to dedicate to GPU.";
if(cpu_threads >= 1) { if (cpu_threads >= 1) {
DeviceInfo cpu_device = device; DeviceInfo cpu_device = device;
cpu_device.cpu_threads = cpu_threads; cpu_device.cpu_threads = cpu_threads;
info.multi_devices.push_back(cpu_device); info.multi_devices.push_back(cpu_device);
} }
else { else {
continue; continue;
} }
} }
else { else {
VLOG(1) << "CPU render threads disabled for interactive render."; VLOG(1) << "CPU render threads disabled for interactive render.";
continue; continue;
} }
} }
else { else {
info.multi_devices.push_back(device); info.multi_devices.push_back(device);
} }
/* Accumulate device info. */ /* Accumulate device info. */
info.has_half_images &= device.has_half_images; info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled; info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_osl &= device.has_osl; info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling; info.has_profiling &= device.has_profiling;
} }
return info; return info;
} }
void Device::tag_update() void Device::tag_update()
{ {
free_memory(); free_memory();
} }
void Device::free_memory() void Device::free_memory()
{ {
devices_initialized_mask = 0; devices_initialized_mask = 0;
cuda_devices.free_memory(); cuda_devices.free_memory();
opencl_devices.free_memory(); opencl_devices.free_memory();
cpu_devices.free_memory(); cpu_devices.free_memory();
network_devices.free_memory(); network_devices.free_memory();
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -40,384 +40,428 @@ class RenderTile;
/* Device Types */ /* Device Types */
enum DeviceType { enum DeviceType {
DEVICE_NONE = 0, DEVICE_NONE = 0,
DEVICE_CPU, DEVICE_CPU,
DEVICE_OPENCL, DEVICE_OPENCL,
DEVICE_CUDA, DEVICE_CUDA,
DEVICE_NETWORK, DEVICE_NETWORK,
DEVICE_MULTI DEVICE_MULTI
}; };
enum DeviceTypeMask { enum DeviceTypeMask {
DEVICE_MASK_CPU = (1 << DEVICE_CPU), DEVICE_MASK_CPU = (1 << DEVICE_CPU),
DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL), DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL),
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA), DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK), DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK),
DEVICE_MASK_ALL = ~0 DEVICE_MASK_ALL = ~0
}; };
enum DeviceKernelStatus { enum DeviceKernelStatus {
DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0, DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE, DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
DEVICE_KERNEL_USING_FEATURE_KERNEL, DEVICE_KERNEL_USING_FEATURE_KERNEL,
DEVICE_KERNEL_FEATURE_KERNEL_INVALID, DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
DEVICE_KERNEL_UNKNOWN, DEVICE_KERNEL_UNKNOWN,
}; };
#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type) #define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
class DeviceInfo { class DeviceInfo {
public: public:
DeviceType type; DeviceType type;
string description; string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */ string id; /* used for user preferences, should stay fixed with changing hardware config */
int num; int num;
bool display_device; /* GPU is used as a display device. */ bool display_device; /* GPU is used as a display device. */
bool has_half_images; /* Support half-float textures. */ bool has_half_images; /* Support half-float textures. */
bool has_volume_decoupled; /* Decoupled volume shading. */ bool has_volume_decoupled; /* Decoupled volume shading. */
bool has_osl; /* Support Open Shading Language. */ bool has_osl; /* Support Open Shading Language. */
bool use_split_kernel; /* Use split or mega kernel. */ bool use_split_kernel; /* Use split or mega kernel. */
bool has_profiling; /* Supports runtime collection of profiling info. */ bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads; int cpu_threads;
vector<DeviceInfo> multi_devices; vector<DeviceInfo> multi_devices;
DeviceInfo() DeviceInfo()
{ {
type = DEVICE_CPU; type = DEVICE_CPU;
id = "CPU"; id = "CPU";
num = 0; num = 0;
cpu_threads = 0; cpu_threads = 0;
display_device = false; display_device = false;
has_half_images = false; has_half_images = false;
has_volume_decoupled = false; has_volume_decoupled = false;
has_osl = false; has_osl = false;
use_split_kernel = false; use_split_kernel = false;
has_profiling = false; has_profiling = false;
} }
bool operator==(const DeviceInfo &info) { bool operator==(const DeviceInfo &info)
/* Multiple Devices with the same ID would be very bad. */ {
assert(id != info.id || (type == info.type && num == info.num && description == info.description)); /* Multiple Devices with the same ID would be very bad. */
return id == info.id; assert(id != info.id ||
} (type == info.type && num == info.num && description == info.description));
return id == info.id;
}
}; };
class DeviceRequestedFeatures { class DeviceRequestedFeatures {
public: public:
/* Use experimental feature set. */ /* Use experimental feature set. */
bool experimental; bool experimental;
/* Selective nodes compilation. */ /* Selective nodes compilation. */
/* Identifier of a node group up to which all the nodes needs to be /* Identifier of a node group up to which all the nodes needs to be
* compiled in. Nodes from higher group indices will be ignores. * compiled in. Nodes from higher group indices will be ignores.
*/ */
int max_nodes_group; int max_nodes_group;
/* Features bitfield indicating which features from the requested group /* Features bitfield indicating which features from the requested group
* will be compiled in. Nodes which corresponds to features which are not * will be compiled in. Nodes which corresponds to features which are not
* in this bitfield will be ignored even if they're in the requested group. * in this bitfield will be ignored even if they're in the requested group.
*/ */
int nodes_features; int nodes_features;
/* BVH/sampling kernel features. */ /* BVH/sampling kernel features. */
bool use_hair; bool use_hair;
bool use_object_motion; bool use_object_motion;
bool use_camera_motion; bool use_camera_motion;
/* Denotes whether baking functionality is needed. */ /* Denotes whether baking functionality is needed. */
bool use_baking; bool use_baking;
/* Use subsurface scattering materials. */ /* Use subsurface scattering materials. */
bool use_subsurface; bool use_subsurface;
/* Use volume materials. */ /* Use volume materials. */
bool use_volume; bool use_volume;
/* Use branched integrator. */ /* Use branched integrator. */
bool use_integrator_branched; bool use_integrator_branched;
/* Use OpenSubdiv patch evaluation */ /* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation; bool use_patch_evaluation;
/* Use Transparent shadows */ /* Use Transparent shadows */
bool use_transparent; bool use_transparent;
/* Use various shadow tricks, such as shadow catcher. */ /* Use various shadow tricks, such as shadow catcher. */
bool use_shadow_tricks; bool use_shadow_tricks;
/* Per-uber shader usage flags. */ /* Per-uber shader usage flags. */
bool use_principled; bool use_principled;
/* Denoising features. */ /* Denoising features. */
bool use_denoising; bool use_denoising;
/* Use raytracing in shaders. */ /* Use raytracing in shaders. */
bool use_shader_raytrace; bool use_shader_raytrace;
/* Use true displacement */ /* Use true displacement */
bool use_true_displacement; bool use_true_displacement;
/* Use background lights */ /* Use background lights */
bool use_background_light; bool use_background_light;
DeviceRequestedFeatures() DeviceRequestedFeatures()
{ {
/* TODO(sergey): Find more meaningful defaults. */ /* TODO(sergey): Find more meaningful defaults. */
experimental = false; experimental = false;
max_nodes_group = 0; max_nodes_group = 0;
nodes_features = 0; nodes_features = 0;
use_hair = false; use_hair = false;
use_object_motion = false; use_object_motion = false;
use_camera_motion = false; use_camera_motion = false;
use_baking = false; use_baking = false;
use_subsurface = false; use_subsurface = false;
use_volume = false; use_volume = false;
use_integrator_branched = false; use_integrator_branched = false;
use_patch_evaluation = false; use_patch_evaluation = false;
use_transparent = false; use_transparent = false;
use_shadow_tricks = false; use_shadow_tricks = false;
use_principled = false; use_principled = false;
use_denoising = false; use_denoising = false;
use_shader_raytrace = false; use_shader_raytrace = false;
use_true_displacement = false; use_true_displacement = false;
use_background_light = false; use_background_light = false;
} }
bool modified(const DeviceRequestedFeatures& requested_features) bool modified(const DeviceRequestedFeatures &requested_features)
{ {
return !(experimental == requested_features.experimental && return !(experimental == requested_features.experimental &&
max_nodes_group == requested_features.max_nodes_group && max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features && nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair && use_hair == requested_features.use_hair &&
use_object_motion == requested_features.use_object_motion && use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion && use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking && use_baking == requested_features.use_baking &&
use_subsurface == requested_features.use_subsurface && use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume && use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched && use_integrator_branched == requested_features.use_integrator_branched &&
use_patch_evaluation == requested_features.use_patch_evaluation && use_patch_evaluation == requested_features.use_patch_evaluation &&
use_transparent == requested_features.use_transparent && use_transparent == requested_features.use_transparent &&
use_shadow_tricks == requested_features.use_shadow_tricks && use_shadow_tricks == requested_features.use_shadow_tricks &&
use_principled == requested_features.use_principled && use_principled == requested_features.use_principled &&
use_denoising == requested_features.use_denoising && use_denoising == requested_features.use_denoising &&
use_shader_raytrace == requested_features.use_shader_raytrace && use_shader_raytrace == requested_features.use_shader_raytrace &&
use_true_displacement == requested_features.use_true_displacement && use_true_displacement == requested_features.use_true_displacement &&
use_background_light == requested_features.use_background_light); use_background_light == requested_features.use_background_light);
} }
/* Convert the requested features structure to a build options, /* Convert the requested features structure to a build options,
* which could then be passed to compilers. * which could then be passed to compilers.
*/ */
string get_build_options() const string get_build_options() const
{ {
string build_options = ""; string build_options = "";
if(experimental) { if (experimental) {
build_options += "-D__KERNEL_EXPERIMENTAL__ "; build_options += "-D__KERNEL_EXPERIMENTAL__ ";
} }
build_options += "-D__NODES_MAX_GROUP__=" + build_options += "-D__NODES_MAX_GROUP__=" + string_printf("%d", max_nodes_group);
string_printf("%d", max_nodes_group); build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
build_options += " -D__NODES_FEATURES__=" + if (!use_hair) {
string_printf("%d", nodes_features); build_options += " -D__NO_HAIR__";
if(!use_hair) { }
build_options += " -D__NO_HAIR__"; if (!use_object_motion) {
} build_options += " -D__NO_OBJECT_MOTION__";
if(!use_object_motion) { }
build_options += " -D__NO_OBJECT_MOTION__"; if (!use_camera_motion) {
} build_options += " -D__NO_CAMERA_MOTION__";
if(!use_camera_motion) { }
build_options += " -D__NO_CAMERA_MOTION__"; if (!use_baking) {
} build_options += " -D__NO_BAKING__";
if(!use_baking) { }
build_options += " -D__NO_BAKING__"; if (!use_volume) {
} build_options += " -D__NO_VOLUME__";
if(!use_volume) { }
build_options += " -D__NO_VOLUME__"; if (!use_subsurface) {
} build_options += " -D__NO_SUBSURFACE__";
if(!use_subsurface) { }
build_options += " -D__NO_SUBSURFACE__"; if (!use_integrator_branched) {
} build_options += " -D__NO_BRANCHED_PATH__";
if(!use_integrator_branched) { }
build_options += " -D__NO_BRANCHED_PATH__"; if (!use_patch_evaluation) {
} build_options += " -D__NO_PATCH_EVAL__";
if(!use_patch_evaluation) { }
build_options += " -D__NO_PATCH_EVAL__"; if (!use_transparent && !use_volume) {
} build_options += " -D__NO_TRANSPARENT__";
if(!use_transparent && !use_volume) { }
build_options += " -D__NO_TRANSPARENT__"; if (!use_shadow_tricks) {
} build_options += " -D__NO_SHADOW_TRICKS__";
if(!use_shadow_tricks) { }
build_options += " -D__NO_SHADOW_TRICKS__"; if (!use_principled) {
} build_options += " -D__NO_PRINCIPLED__";
if(!use_principled) { }
build_options += " -D__NO_PRINCIPLED__"; if (!use_denoising) {
} build_options += " -D__NO_DENOISING__";
if(!use_denoising) { }
build_options += " -D__NO_DENOISING__"; if (!use_shader_raytrace) {
} build_options += " -D__NO_SHADER_RAYTRACE__";
if(!use_shader_raytrace) { }
build_options += " -D__NO_SHADER_RAYTRACE__"; return build_options;
} }
return build_options;
}
}; };
std::ostream& operator <<(std::ostream &os, std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features);
const DeviceRequestedFeatures& requested_features);
/* Device */ /* Device */
struct DeviceDrawParams { struct DeviceDrawParams {
function<void()> bind_display_space_shader_cb; function<void()> bind_display_space_shader_cb;
function<void()> unbind_display_space_shader_cb; function<void()> unbind_display_space_shader_cb;
}; };
class Device { class Device {
friend class device_sub_ptr; friend class device_sub_ptr;
protected:
enum {
FALLBACK_SHADER_STATUS_NONE = 0,
FALLBACK_SHADER_STATUS_ERROR,
FALLBACK_SHADER_STATUS_SUCCESS,
};
Device(DeviceInfo& info_, Stats &stats_, Profiler &profiler_, bool background) : background(background), protected:
vertex_buffer(0), enum {
fallback_status(FALLBACK_SHADER_STATUS_NONE), fallback_shader_program(0), FALLBACK_SHADER_STATUS_NONE = 0,
info(info_), stats(stats_), profiler(profiler_) {} FALLBACK_SHADER_STATUS_ERROR,
FALLBACK_SHADER_STATUS_SUCCESS,
};
bool background; Device(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background)
string error_msg; : background(background),
vertex_buffer(0),
fallback_status(FALLBACK_SHADER_STATUS_NONE),
fallback_shader_program(0),
info(info_),
stats(stats_),
profiler(profiler_)
{
}
/* used for real time display */ bool background;
unsigned int vertex_buffer; string error_msg;
int fallback_status, fallback_shader_program;
int image_texture_location, fullscreen_location;
bool bind_fallback_display_space_shader(const float width, const float height); /* used for real time display */
unsigned int vertex_buffer;
int fallback_status, fallback_shader_program;
int image_texture_location, fullscreen_location;
virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/) bool bind_fallback_display_space_shader(const float width, const float height);
{
/* Only required for devices that implement denoising. */
assert(false);
return (device_ptr) 0;
}
virtual void mem_free_sub_ptr(device_ptr /*ptr*/) {};
public: virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
virtual ~Device(); {
/* Only required for devices that implement denoising. */
assert(false);
return (device_ptr)0;
}
virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
/* info */ public:
DeviceInfo info; virtual ~Device();
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
virtual void set_error(const string& error)
{
if(!have_error()) {
error_msg = error;
}
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const { return false; }
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* statistics */ /* info */
Stats &stats; DeviceInfo info;
Profiler &profiler; virtual const string &error_message()
{
return error_msg;
}
bool have_error()
{
return !error_message().empty();
}
virtual void set_error(const string &error)
{
if (!have_error()) {
error_msg = error;
}
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const
{
return false;
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* memory alignment */ /* statistics */
virtual int mem_sub_ptr_alignment() { return MIN_ALIGNMENT_CPU_DATA_TYPES; } Stats &stats;
Profiler &profiler;
/* constant memory */ /* memory alignment */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0; virtual int mem_sub_ptr_alignment()
{
return MIN_ALIGNMENT_CPU_DATA_TYPES;
}
/* open shading language, only for CPU device */ /* constant memory */
virtual void *osl_memory() { return NULL; } virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
/* load/compile kernels, must be called before adding tasks */ /* open shading language, only for CPU device */
virtual bool load_kernels( virtual void *osl_memory()
const DeviceRequestedFeatures& /*requested_features*/) {
{ return true; } return NULL;
}
/* Wait for device to become available to upload data and receive tasks /* load/compile kernels, must be called before adding tasks */
* This method is used by the OpenCL device to load the virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/)
* optimized kernels or when not (yet) available load the {
* generic kernels (only during foreground rendering) */ return true;
virtual bool wait_for_availability( }
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{ return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
/* tasks */ /* Wait for device to become available to upload data and receive tasks
virtual int get_split_task_count(DeviceTask& task) = 0; * This method is used by the OpenCL device to load the
virtual void task_add(DeviceTask& task) = 0; * optimized kernels or when not (yet) available load the
virtual void task_wait() = 0; * generic kernels (only during foreground rendering) */
virtual void task_cancel() = 0; virtual bool wait_for_availability(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
/* opengl drawing */ /* tasks */
virtual void draw_pixels(device_memory& mem, int y, virtual int get_split_task_count(DeviceTask &task) = 0;
int w, int h, int width, int height, virtual void task_add(DeviceTask &task) = 0;
int dx, int dy, int dw, int dh, virtual void task_wait() = 0;
bool transparent, const DeviceDrawParams &draw_params); virtual void task_cancel() = 0;
/* opengl drawing */
virtual void draw_pixels(device_memory &mem,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params);
#ifdef WITH_NETWORK #ifdef WITH_NETWORK
/* networking */ /* networking */
void server_run(); void server_run();
#endif #endif
/* multi device */ /* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {} virtual void map_tile(Device * /*sub_device*/, RenderTile & /*tile*/)
virtual int device_number(Device * /*sub_device*/) { return 0; } {
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {} }
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {} virtual int device_number(Device * /*sub_device*/)
{
return 0;
}
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
/* static */ /* static */
static Device *create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background = true); static Device *create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background = true);
static DeviceType type_from_string(const char *name); static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type); static string string_from_type(DeviceType type);
static vector<DeviceType> available_types(); static vector<DeviceType> available_types();
static vector<DeviceInfo> available_devices(uint device_type_mask = DEVICE_MASK_ALL); static vector<DeviceInfo> available_devices(uint device_type_mask = DEVICE_MASK_ALL);
static string device_capabilities(uint device_type_mask = DEVICE_MASK_ALL); static string device_capabilities(uint device_type_mask = DEVICE_MASK_ALL);
static DeviceInfo get_multi_device(const vector<DeviceInfo>& subdevices, static DeviceInfo get_multi_device(const vector<DeviceInfo> &subdevices,
int threads, int threads,
bool background); bool background);
/* Tag devices lists for update. */ /* Tag devices lists for update. */
static void tag_update(); static void tag_update();
static void free_memory(); static void free_memory();
protected: protected:
/* Memory allocation, only accessed through device_memory. */ /* Memory allocation, only accessed through device_memory. */
friend class MultiDevice; friend class MultiDevice;
friend class DeviceServer; friend class DeviceServer;
friend class device_memory; friend class device_memory;
virtual void mem_alloc(device_memory& mem) = 0; virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory& mem) = 0; virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_from(device_memory& mem, virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
int y, int w, int h, int elem) = 0; virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_zero(device_memory& mem) = 0; virtual void mem_free(device_memory &mem) = 0;
virtual void mem_free(device_memory& mem) = 0;
private: private:
/* Indicted whether device types and devices lists were initialized. */ /* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update; static bool need_types_update, need_devices_update;
static thread_mutex device_mutex; static thread_mutex device_mutex;
static vector<DeviceInfo> cuda_devices; static vector<DeviceInfo> cuda_devices;
static vector<DeviceInfo> opencl_devices; static vector<DeviceInfo> opencl_devices;
static vector<DeviceInfo> cpu_devices; static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> network_devices; static vector<DeviceInfo> network_devices;
static uint devices_initialized_mask; static uint devices_initialized_mask;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __DEVICE_H__ */ #endif /* __DEVICE_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -21,314 +21,329 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task) DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE), : tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
profiler(NULL), profiler(NULL),
storage(device), storage(device),
buffer(device), buffer(device),
device(device) device(device)
{ {
radius = task.denoising.radius; radius = task.denoising.radius;
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength)); nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength));
if(task.denoising.relative_pca) { if (task.denoising.relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength)); pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength));
} }
else { else {
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising.feature_strength)); pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising.feature_strength));
} }
render_buffer.frame_stride = task.frame_stride; render_buffer.frame_stride = task.frame_stride;
render_buffer.pass_stride = task.pass_stride; render_buffer.pass_stride = task.pass_stride;
render_buffer.offset = task.pass_denoising_data; render_buffer.offset = task.pass_denoising_data;
target_buffer.pass_stride = task.target_pass_stride; target_buffer.pass_stride = task.target_pass_stride;
target_buffer.denoising_clean_offset = task.pass_denoising_clean; target_buffer.denoising_clean_offset = task.pass_denoising_clean;
target_buffer.offset = 0; target_buffer.offset = 0;
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device); functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device); functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int)); tile_info = (TileInfo *)tile_info_mem.alloc(sizeof(TileInfo) / sizeof(int));
tile_info->from_render = task.denoising_from_render? 1 : 0; tile_info->from_render = task.denoising_from_render ? 1 : 0;
tile_info->frames[0] = 0; tile_info->frames[0] = 0;
tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES); tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
for(int i = 1; i < tile_info->num_frames; i++) { for (int i = 1; i < tile_info->num_frames; i++) {
tile_info->frames[i] = task.denoising_frames[i-1]; tile_info->frames[i] = task.denoising_frames[i - 1];
} }
write_passes = task.denoising_write_passes; write_passes = task.denoising_write_passes;
do_filter = task.denoising_do_filter; do_filter = task.denoising_do_filter;
} }
DenoisingTask::~DenoisingTask() DenoisingTask::~DenoisingTask()
{ {
storage.XtWX.free(); storage.XtWX.free();
storage.XtWY.free(); storage.XtWY.free();
storage.transform.free(); storage.transform.free();
storage.rank.free(); storage.rank.free();
buffer.mem.free(); buffer.mem.free();
buffer.temporary_mem.free(); buffer.temporary_mem.free();
tile_info_mem.free(); tile_info_mem.free();
} }
void DenoisingTask::set_render_buffer(RenderTile *rtiles) void DenoisingTask::set_render_buffer(RenderTile *rtiles)
{ {
for(int i = 0; i < 9; i++) { for (int i = 0; i < 9; i++) {
tile_info->offsets[i] = rtiles[i].offset; tile_info->offsets[i] = rtiles[i].offset;
tile_info->strides[i] = rtiles[i].stride; tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer; tile_info->buffers[i] = rtiles[i].buffer;
} }
tile_info->x[0] = rtiles[3].x; tile_info->x[0] = rtiles[3].x;
tile_info->x[1] = rtiles[4].x; tile_info->x[1] = rtiles[4].x;
tile_info->x[2] = rtiles[5].x; tile_info->x[2] = rtiles[5].x;
tile_info->x[3] = rtiles[5].x + rtiles[5].w; tile_info->x[3] = rtiles[5].x + rtiles[5].w;
tile_info->y[0] = rtiles[1].y; tile_info->y[0] = rtiles[1].y;
tile_info->y[1] = rtiles[4].y; tile_info->y[1] = rtiles[4].y;
tile_info->y[2] = rtiles[7].y; tile_info->y[2] = rtiles[7].y;
tile_info->y[3] = rtiles[7].y + rtiles[7].h; tile_info->y[3] = rtiles[7].y + rtiles[7].h;
target_buffer.offset = rtiles[9].offset; target_buffer.offset = rtiles[9].offset;
target_buffer.stride = rtiles[9].stride; target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer; target_buffer.ptr = rtiles[9].buffer;
if(write_passes && rtiles[9].buffers) { if (write_passes && rtiles[9].buffers) {
target_buffer.denoising_output_offset = rtiles[9].buffers->params.get_denoising_prefiltered_offset(); target_buffer.denoising_output_offset =
} rtiles[9].buffers->params.get_denoising_prefiltered_offset();
else { }
target_buffer.denoising_output_offset = 0; else {
} target_buffer.denoising_output_offset = 0;
}
tile_info_mem.copy_to_device(); tile_info_mem.copy_to_device();
} }
void DenoisingTask::setup_denoising_buffer() void DenoisingTask::setup_denoising_buffer()
{ {
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */ /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w); rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius); rect = rect_expand(rect, radius);
rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3])); rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
buffer.use_intensity = write_passes || (tile_info->num_frames > 1); buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity? 15 : 14; buffer.passes = buffer.use_intensity ? 15 : 14;
buffer.width = rect.z - rect.x; buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4); buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y; buffer.h = rect.w - rect.y;
int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float)); int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats); buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
buffer.frame_stride = buffer.pass_stride * buffer.passes; buffer.frame_stride = buffer.pass_stride * buffer.passes;
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */ /* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats); int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats);
buffer.mem.alloc_to_device(mem_size, false); buffer.mem.alloc_to_device(mem_size, false);
buffer.use_time = (tile_info->num_frames > 1); buffer.use_time = (tile_info->num_frames > 1);
/* CPUs process shifts sequentially while GPUs process them in parallel. */ /* CPUs process shifts sequentially while GPUs process them in parallel. */
int num_layers; int num_layers;
if(buffer.gpu_temporary_mem) { if (buffer.gpu_temporary_mem) {
/* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */ /* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
int max_radius = max(radius, 6); int max_radius = max(radius, 6);
int num_shifts = (2*max_radius + 1) * (2*max_radius + 1); int num_shifts = (2 * max_radius + 1) * (2 * max_radius + 1);
num_layers = 2*num_shifts + 1; num_layers = 2 * num_shifts + 1;
} }
else { else {
num_layers = 3; num_layers = 3;
} }
/* Allocate two layers per shift as well as one for the weight accumulation. */ /* Allocate two layers per shift as well as one for the weight accumulation. */
buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride); buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride);
} }
void DenoisingTask::prefilter_shadowing() void DenoisingTask::prefilter_shadowing()
{ {
device_ptr null_ptr = (device_ptr) 0; device_ptr null_ptr = (device_ptr)0;
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride); device_sub_ptr unfiltered_a(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride); device_sub_ptr unfiltered_b(buffer.mem, 1 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride); device_sub_ptr sample_var(buffer.mem, 2 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride); device_sub_ptr sample_var_var(buffer.mem, 3 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride); device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride); device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */ /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var); functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */ /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false); nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var); functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */ /* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var, device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
filtered_b = *sample_var; /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */ nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false); functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a); functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
device_ptr residual_var = *sample_var_var; device_ptr residual_var = *sample_var_var;
/* Estimate the residual variance between the two filtered halves. */ /* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect); functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr final_a = *unfiltered_a, device_ptr final_a = *unfiltered_a, final_b = *unfiltered_b;
final_b = *unfiltered_b; /* Use the residual variance for a second filter pass. */
/* Use the residual variance for a second filter pass. */ nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false); functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a); functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
/* Combine the two double-filtered halves to a final shadow feature. */ /* Combine the two double-filtered halves to a final shadow feature. */
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride); device_sub_ptr shadow_pass(buffer.mem, 4 * buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect); functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
} }
void DenoisingTask::prefilter_features() void DenoisingTask::prefilter_features()
{ {
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); device_sub_ptr unfiltered(buffer.mem, 8 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); device_sub_ptr variance(buffer.mem, 9 * buffer.pass_stride, buffer.pass_stride);
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 }; int mean_from[] = {0, 1, 2, 12, 6, 7, 8};
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11}; int variance_from[] = {3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7}; int pass_to[] = {1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) { for (int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride); device_sub_ptr feature_pass(
/* Get the unfiltered pass and its variance from the RenderBuffers. */ buffer.mem, pass_to[pass] * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance, 1.0f / render_buffer.samples); /* Get the unfiltered pass and its variance from the RenderBuffers. */
/* Smooth the pass and store the result in the denoising buffers. */ functions.get_feature(mean_from[pass],
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false); variance_from[pass],
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass); *unfiltered,
} *variance,
1.0f / render_buffer.samples);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
}
} }
void DenoisingTask::prefilter_color() void DenoisingTask::prefilter_color()
{ {
int mean_from[] = {20, 21, 22}; int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25}; int variance_from[] = {23, 24, 25};
int mean_to[] = { 8, 9, 10}; int mean_to[] = {8, 9, 10};
int variance_to[] = {11, 12, 13}; int variance_to[] = {11, 12, 13};
int num_color_passes = 3; int num_color_passes = 3;
device_only_memory<float> temporary_color(device, "denoising temporary color"); device_only_memory<float> temporary_color(device, "denoising temporary color");
temporary_color.alloc_to_device(3*buffer.pass_stride, false); temporary_color.alloc_to_device(3 * buffer.pass_stride, false);
for(int pass = 0; pass < num_color_passes; pass++) { for (int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass*buffer.pass_stride, buffer.pass_stride); device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride); device_sub_ptr color_var_pass(
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass, 1.0f / render_buffer.samples); buffer.mem, variance_to[pass] * buffer.pass_stride, buffer.pass_stride);
} functions.get_feature(mean_from[pass],
variance_from[pass],
*color_pass,
*color_var_pass,
1.0f / render_buffer.samples);
}
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride); device_sub_ptr depth_pass(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride); device_sub_ptr color_var_pass(
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride); buffer.mem, variance_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
functions.detect_outliers(temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); device_sub_ptr output_pass(buffer.mem, mean_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
functions.detect_outliers(
temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
if(buffer.use_intensity) { if (buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride); device_sub_ptr intensity_pass(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2*4.0f, true); nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2 * 4.0f, true);
functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass); functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass);
} }
} }
void DenoisingTask::load_buffer() void DenoisingTask::load_buffer()
{ {
device_ptr null_ptr = (device_ptr) 0; device_ptr null_ptr = (device_ptr)0;
int original_offset = render_buffer.offset; int original_offset = render_buffer.offset;
int num_passes = buffer.use_intensity? 15 : 14; int num_passes = buffer.use_intensity ? 15 : 14;
for(int i = 0; i < tile_info->num_frames; i++) { for (int i = 0; i < tile_info->num_frames; i++) {
for(int pass = 0; pass < num_passes; pass++) { for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(buffer.mem, i*buffer.frame_stride + pass*buffer.pass_stride, buffer.pass_stride); device_sub_ptr to_pass(
bool is_variance = (pass >= 11) && (pass <= 13); buffer.mem, i * buffer.frame_stride + pass * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(pass, -1, *to_pass, null_ptr, is_variance? (1.0f / render_buffer.samples) : 1.0f); bool is_variance = (pass >= 11) && (pass <= 13);
} functions.get_feature(
render_buffer.offset += render_buffer.frame_stride; pass, -1, *to_pass, null_ptr, is_variance ? (1.0f / render_buffer.samples) : 1.0f);
} }
render_buffer.offset += render_buffer.frame_stride;
}
render_buffer.offset = original_offset; render_buffer.offset = original_offset;
} }
void DenoisingTask::write_buffer() void DenoisingTask::write_buffer()
{ {
reconstruction_state.buffer_params = make_int4(target_buffer.offset, reconstruction_state.buffer_params = make_int4(target_buffer.offset,
target_buffer.stride, target_buffer.stride,
target_buffer.pass_stride, target_buffer.pass_stride,
target_buffer.denoising_clean_offset); target_buffer.denoising_clean_offset);
int num_passes = buffer.use_intensity? 15 : 14; int num_passes = buffer.use_intensity ? 15 : 14;
for(int pass = 0; pass < num_passes; pass++) { for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr from_pass(buffer.mem, pass*buffer.pass_stride, buffer.pass_stride); device_sub_ptr from_pass(buffer.mem, pass * buffer.pass_stride, buffer.pass_stride);
int out_offset = pass + target_buffer.denoising_output_offset; int out_offset = pass + target_buffer.denoising_output_offset;
functions.write_feature(out_offset, *from_pass, target_buffer.ptr); functions.write_feature(out_offset, *from_pass, target_buffer.ptr);
} }
} }
void DenoisingTask::construct_transform() void DenoisingTask::construct_transform()
{ {
storage.w = filter_area.z; storage.w = filter_area.z;
storage.h = filter_area.w; storage.h = filter_area.w;
storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false); storage.transform.alloc_to_device(storage.w * storage.h * TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w*storage.h, false); storage.rank.alloc_to_device(storage.w * storage.h, false);
functions.construct_transform(); functions.construct_transform();
} }
void DenoisingTask::reconstruct() void DenoisingTask::reconstruct()
{ {
storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE, false); storage.XtWX.alloc_to_device(storage.w * storage.h * XTWX_SIZE, false);
storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false); storage.XtWY.alloc_to_device(storage.w * storage.h * XTWY_SIZE, false);
storage.XtWX.zero_to_device(); storage.XtWX.zero_to_device();
storage.XtWY.zero_to_device(); storage.XtWY.zero_to_device();
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); reconstruction_state.filter_window = rect_from_shape(
int tile_coordinate_offset = filter_area.y*target_buffer.stride + filter_area.x; filter_area.x - rect.x, filter_area.y - rect.y, storage.w, storage.h);
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset, int tile_coordinate_offset = filter_area.y * target_buffer.stride + filter_area.x;
target_buffer.stride, reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
target_buffer.pass_stride, target_buffer.stride,
target_buffer.denoising_clean_offset); target_buffer.pass_stride,
reconstruction_state.source_w = rect.z-rect.x; target_buffer.denoising_clean_offset);
reconstruction_state.source_h = rect.w-rect.y; reconstruction_state.source_w = rect.z - rect.x;
reconstruction_state.source_h = rect.w - rect.y;
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride); device_sub_ptr color_ptr(buffer.mem, 8 * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride); device_sub_ptr color_var_ptr(buffer.mem, 11 * buffer.pass_stride, 3 * buffer.pass_stride);
for(int f = 0; f < tile_info->num_frames; f++) { for (int f = 0; f < tile_info->num_frames; f++) {
device_ptr scale_ptr = 0; device_ptr scale_ptr = 0;
device_sub_ptr *scale_sub_ptr = NULL; device_sub_ptr *scale_sub_ptr = NULL;
if(tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) { if (tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
scale_sub_ptr = new device_sub_ptr(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride); scale_sub_ptr = new device_sub_ptr(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
scale_ptr = **scale_sub_ptr; scale_ptr = **scale_sub_ptr;
} }
functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f); functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f);
delete scale_sub_ptr; delete scale_sub_ptr;
} }
functions.solve(target_buffer.ptr); functions.solve(target_buffer.ptr);
} }
void DenoisingTask::run_denoising(RenderTile *tile) void DenoisingTask::run_denoising(RenderTile *tile)
{ {
RenderTile rtiles[10]; RenderTile rtiles[10];
rtiles[4] = *tile; rtiles[4] = *tile;
functions.map_neighbor_tiles(rtiles); functions.map_neighbor_tiles(rtiles);
set_render_buffer(rtiles); set_render_buffer(rtiles);
setup_denoising_buffer(); setup_denoising_buffer();
if(tile_info->from_render) { if (tile_info->from_render) {
prefilter_shadowing(); prefilter_shadowing();
prefilter_features(); prefilter_features();
prefilter_color(); prefilter_color();
} }
else { else {
load_buffer(); load_buffer();
} }
if(do_filter) { if (do_filter) {
construct_transform(); construct_transform();
reconstruct(); reconstruct();
} }
if(write_passes) { if (write_passes) {
write_buffer(); write_buffer();
} }
functions.unmap_neighbor_tiles(rtiles); functions.unmap_neighbor_tiles(rtiles);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -28,165 +28,169 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
class DenoisingTask { class DenoisingTask {
public: public:
/* Parameters of the denoising algorithm. */ /* Parameters of the denoising algorithm. */
int radius; int radius;
float nlm_k_2; float nlm_k_2;
float pca_threshold; float pca_threshold;
/* Parameters of the RenderBuffers. */ /* Parameters of the RenderBuffers. */
struct RenderBuffers { struct RenderBuffers {
int offset; int offset;
int pass_stride; int pass_stride;
int frame_stride; int frame_stride;
int samples; int samples;
} render_buffer; } render_buffer;
/* Pointer and parameters of the target buffer. */ /* Pointer and parameters of the target buffer. */
struct TargetBuffer { struct TargetBuffer {
int offset; int offset;
int stride; int stride;
int pass_stride; int pass_stride;
int denoising_clean_offset; int denoising_clean_offset;
int denoising_output_offset; int denoising_output_offset;
device_ptr ptr; device_ptr ptr;
} target_buffer; } target_buffer;
TileInfo *tile_info; TileInfo *tile_info;
device_vector<int> tile_info_mem; device_vector<int> tile_info_mem;
ProfilingState *profiler; ProfilingState *profiler;
int4 rect; int4 rect;
int4 filter_area; int4 filter_area;
bool write_passes; bool write_passes;
bool do_filter; bool do_filter;
struct DeviceFunctions { struct DeviceFunctions {
function<bool(device_ptr image_ptr, /* Contains the values that are smoothed. */ function<bool(
device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */ device_ptr image_ptr, /* Contains the values that are smoothed. */
device_ptr variance_ptr, /* Contains the variance of the guide image. */ device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */
device_ptr out_ptr /* The filtered output is written into this image. */ device_ptr variance_ptr, /* Contains the variance of the guide image. */
)> non_local_means; device_ptr out_ptr /* The filtered output is written into this image. */
function<bool(device_ptr color_ptr, )>
device_ptr color_variance_ptr, non_local_means;
device_ptr scale_ptr, function<bool(
int frame device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)>
)> accumulate; accumulate;
function<bool(device_ptr output_ptr)> solve; function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform; function<bool()> construct_transform;
function<bool(device_ptr a_ptr, function<bool(device_ptr a_ptr,
device_ptr b_ptr, device_ptr b_ptr,
device_ptr mean_ptr, device_ptr mean_ptr,
device_ptr variance_ptr, device_ptr variance_ptr,
int r, int r,
int4 rect int4 rect)>
)> combine_halves; combine_halves;
function<bool(device_ptr a_ptr, function<bool(device_ptr a_ptr,
device_ptr b_ptr, device_ptr b_ptr,
device_ptr sample_variance_ptr, device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr, device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr device_ptr buffer_variance_ptr)>
)> divide_shadow; divide_shadow;
function<bool(int mean_offset, function<bool(int mean_offset,
int variance_offset, int variance_offset,
device_ptr mean_ptr, device_ptr mean_ptr,
device_ptr variance_ptr, device_ptr variance_ptr,
float scale float scale)>
)> get_feature; get_feature;
function<bool(device_ptr image_ptr, function<bool(device_ptr image_ptr,
device_ptr variance_ptr, device_ptr variance_ptr,
device_ptr depth_ptr, device_ptr depth_ptr,
device_ptr output_ptr device_ptr output_ptr)>
)> detect_outliers; detect_outliers;
function<bool(int out_offset, function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
device_ptr frop_ptr, function<void(RenderTile *rtiles)> map_neighbor_tiles;
device_ptr buffer_ptr function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
)> write_feature; } functions;
function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
} functions;
/* Stores state of the current Reconstruction operation, /* Stores state of the current Reconstruction operation,
* which is accessed by the device in order to perform the operation. */ * which is accessed by the device in order to perform the operation. */
struct ReconstructionState { struct ReconstructionState {
int4 filter_window; int4 filter_window;
int4 buffer_params; int4 buffer_params;
int source_w; int source_w;
int source_h; int source_h;
} reconstruction_state; } reconstruction_state;
/* Stores state of the current NLM operation, /* Stores state of the current NLM operation,
* which is accessed by the device in order to perform the operation. */ * which is accessed by the device in order to perform the operation. */
struct NLMState { struct NLMState {
int r; /* Search radius of the filter. */ int r; /* Search radius of the filter. */
int f; /* Patch size of the filter. */ int f; /* Patch size of the filter. */
float a; /* Variance compensation factor in the MSE estimation. */ float a; /* Variance compensation factor in the MSE estimation. */
float k_2; /* Squared value of the k parameter of the filter. */ float k_2; /* Squared value of the k parameter of the filter. */
bool is_color; bool is_color;
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_) { r = r_; f = f_; a = a_, k_2 = k_2_; is_color = is_color_; } void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_)
} nlm_state; {
r = r_;
f = f_;
a = a_, k_2 = k_2_;
is_color = is_color_;
}
} nlm_state;
struct Storage { struct Storage {
device_only_memory<float> transform; device_only_memory<float> transform;
device_only_memory<int> rank; device_only_memory<int> rank;
device_only_memory<float> XtWX; device_only_memory<float> XtWX;
device_only_memory<float3> XtWY; device_only_memory<float3> XtWY;
int w; int w;
int h; int h;
Storage(Device *device) Storage(Device *device)
: transform(device, "denoising transform"), : transform(device, "denoising transform"),
rank(device, "denoising rank"), rank(device, "denoising rank"),
XtWX(device, "denoising XtWX"), XtWX(device, "denoising XtWX"),
XtWY(device, "denoising XtWY") XtWY(device, "denoising XtWY")
{} {
} storage; }
} storage;
DenoisingTask(Device *device, const DeviceTask &task); DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask(); ~DenoisingTask();
void run_denoising(RenderTile *tile); void run_denoising(RenderTile *tile);
struct DenoiseBuffers { struct DenoiseBuffers {
int pass_stride; int pass_stride;
int passes; int passes;
int stride; int stride;
int h; int h;
int width; int width;
int frame_stride; int frame_stride;
device_only_memory<float> mem; device_only_memory<float> mem;
device_only_memory<float> temporary_mem; device_only_memory<float> temporary_mem;
bool use_time; bool use_time;
bool use_intensity; bool use_intensity;
bool gpu_temporary_mem; bool gpu_temporary_mem;
DenoiseBuffers(Device *device) DenoiseBuffers(Device *device)
: mem(device, "denoising pixel buffer"), : mem(device, "denoising pixel buffer"), temporary_mem(device, "denoising temporary mem")
temporary_mem(device, "denoising temporary mem") {
{} }
} buffer; } buffer;
protected: protected:
Device *device; Device *device;
void set_render_buffer(RenderTile *rtiles); void set_render_buffer(RenderTile *rtiles);
void setup_denoising_buffer(); void setup_denoising_buffer();
void prefilter_shadowing(); void prefilter_shadowing();
void prefilter_features(); void prefilter_features();
void prefilter_color(); void prefilter_color();
void construct_transform(); void construct_transform();
void reconstruct(); void reconstruct();
void load_buffer(); void load_buffer();
void write_buffer(); void write_buffer();
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __DEVICE_DENOISING_H__ */ #endif /* __DEVICE_DENOISING_H__ */

View File

@@ -21,19 +21,22 @@ CCL_NAMESPACE_BEGIN
class Device; class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init(); bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_compile_kernel(const vector<string>& parameters); bool device_opencl_compile_kernel(const vector<string> &parameters);
bool device_cuda_init(); bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address); Device *device_network_create(DeviceInfo &info,
Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background); Stats &stats,
Profiler &profiler,
const char *address);
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
void device_cpu_info(vector<DeviceInfo>& devices); void device_cpu_info(vector<DeviceInfo> &devices);
void device_opencl_info(vector<DeviceInfo>& devices); void device_opencl_info(vector<DeviceInfo> &devices);
void device_cuda_info(vector<DeviceInfo>& devices); void device_cuda_info(vector<DeviceInfo> &devices);
void device_network_info(vector<DeviceInfo>& devices); void device_network_info(vector<DeviceInfo> &devices);
string device_cpu_capabilities(); string device_cpu_capabilities();
string device_opencl_capabilities(); string device_opencl_capabilities();
@@ -41,4 +44,4 @@ string device_cuda_capabilities();
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __DEVICE_INTERN_H__ */ #endif /* __DEVICE_INTERN_H__ */

View File

@@ -22,21 +22,21 @@ CCL_NAMESPACE_BEGIN
/* Device Memory */ /* Device Memory */
device_memory::device_memory(Device *device, const char *name, MemoryType type) device_memory::device_memory(Device *device, const char *name, MemoryType type)
: data_type(device_type_traits<uchar>::data_type), : data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements), data_elements(device_type_traits<uchar>::num_elements),
data_size(0), data_size(0),
device_size(0), device_size(0),
data_width(0), data_width(0),
data_height(0), data_height(0),
data_depth(0), data_depth(0),
type(type), type(type),
name(name), name(name),
interpolation(INTERPOLATION_NONE), interpolation(INTERPOLATION_NONE),
extension(EXTENSION_REPEAT), extension(EXTENSION_REPEAT),
device(device), device(device),
device_pointer(0), device_pointer(0),
host_pointer(0), host_pointer(0),
shared_pointer(0) shared_pointer(0)
{ {
} }
@@ -46,95 +46,94 @@ device_memory::~device_memory()
void *device_memory::host_alloc(size_t size) void *device_memory::host_alloc(size_t size)
{ {
if(!size) { if (!size) {
return 0; return 0;
} }
void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES); void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
if(ptr) { if (ptr) {
util_guarded_mem_alloc(size); util_guarded_mem_alloc(size);
} }
else { else {
throw std::bad_alloc(); throw std::bad_alloc();
} }
return ptr; return ptr;
} }
void device_memory::host_free() void device_memory::host_free()
{ {
if(host_pointer) { if (host_pointer) {
util_guarded_mem_free(memory_size()); util_guarded_mem_free(memory_size());
util_aligned_free((void*)host_pointer); util_aligned_free((void *)host_pointer);
host_pointer = 0; host_pointer = 0;
} }
} }
void device_memory::device_alloc() void device_memory::device_alloc()
{ {
assert(!device_pointer && type != MEM_TEXTURE); assert(!device_pointer && type != MEM_TEXTURE);
device->mem_alloc(*this); device->mem_alloc(*this);
} }
void device_memory::device_free() void device_memory::device_free()
{ {
if(device_pointer) { if (device_pointer) {
device->mem_free(*this); device->mem_free(*this);
} }
} }
void device_memory::device_copy_to() void device_memory::device_copy_to()
{ {
if(host_pointer) { if (host_pointer) {
device->mem_copy_to(*this); device->mem_copy_to(*this);
} }
} }
void device_memory::device_copy_from(int y, int w, int h, int elem) void device_memory::device_copy_from(int y, int w, int h, int elem)
{ {
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY); assert(type != MEM_TEXTURE && type != MEM_READ_ONLY);
device->mem_copy_from(*this, y, w, h, elem); device->mem_copy_from(*this, y, w, h, elem);
} }
void device_memory::device_zero() void device_memory::device_zero()
{ {
if(data_size) { if (data_size) {
device->mem_zero(*this); device->mem_zero(*this);
} }
} }
void device_memory::swap_device(Device *new_device, void device_memory::swap_device(Device *new_device,
size_t new_device_size, size_t new_device_size,
device_ptr new_device_ptr) device_ptr new_device_ptr)
{ {
original_device = device; original_device = device;
original_device_size = device_size; original_device_size = device_size;
original_device_ptr = device_pointer; original_device_ptr = device_pointer;
device = new_device; device = new_device;
device_size = new_device_size; device_size = new_device_size;
device_pointer = new_device_ptr; device_pointer = new_device_ptr;
} }
void device_memory::restore_device() void device_memory::restore_device()
{ {
device = original_device; device = original_device;
device_size = original_device_size; device_size = original_device_size;
device_pointer = original_device_ptr; device_pointer = original_device_ptr;
} }
/* Device Sub Ptr */ /* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size) device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
: device(mem.device)
{ {
ptr = device->mem_alloc_sub_ptr(mem, offset, size); ptr = device->mem_alloc_sub_ptr(mem, offset, size);
} }
device_sub_ptr::~device_sub_ptr() device_sub_ptr::~device_sub_ptr()
{ {
device->mem_free_sub_ptr(ptr); device->mem_free_sub_ptr(ptr);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -31,152 +31,155 @@ CCL_NAMESPACE_BEGIN
class Device; class Device;
enum MemoryType { enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS };
MEM_READ_ONLY,
MEM_READ_WRITE,
MEM_DEVICE_ONLY,
MEM_TEXTURE,
MEM_PIXELS
};
/* Supported Data Types */ /* Supported Data Types */
enum DataType { enum DataType {
TYPE_UNKNOWN, TYPE_UNKNOWN,
TYPE_UCHAR, TYPE_UCHAR,
TYPE_UINT16, TYPE_UINT16,
TYPE_UINT, TYPE_UINT,
TYPE_INT, TYPE_INT,
TYPE_FLOAT, TYPE_FLOAT,
TYPE_HALF, TYPE_HALF,
TYPE_UINT64, TYPE_UINT64,
}; };
static inline size_t datatype_size(DataType datatype) static inline size_t datatype_size(DataType datatype)
{ {
switch(datatype) { switch (datatype) {
case TYPE_UNKNOWN: return 1; case TYPE_UNKNOWN:
case TYPE_UCHAR: return sizeof(uchar); return 1;
case TYPE_FLOAT: return sizeof(float); case TYPE_UCHAR:
case TYPE_UINT: return sizeof(uint); return sizeof(uchar);
case TYPE_UINT16: return sizeof(uint16_t); case TYPE_FLOAT:
case TYPE_INT: return sizeof(int); return sizeof(float);
case TYPE_HALF: return sizeof(half); case TYPE_UINT:
case TYPE_UINT64: return sizeof(uint64_t); return sizeof(uint);
default: return 0; case TYPE_UINT16:
} return sizeof(uint16_t);
case TYPE_INT:
return sizeof(int);
case TYPE_HALF:
return sizeof(half);
case TYPE_UINT64:
return sizeof(uint64_t);
default:
return 0;
}
} }
/* Traits for data types */ /* Traits for data types */
template<typename T> struct device_type_traits { template<typename T> struct device_type_traits {
static const DataType data_type = TYPE_UNKNOWN; static const DataType data_type = TYPE_UNKNOWN;
static const int num_elements = sizeof(T); static const int num_elements = sizeof(T);
}; };
template<> struct device_type_traits<uchar> { template<> struct device_type_traits<uchar> {
static const DataType data_type = TYPE_UCHAR; static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 1; static const int num_elements = 1;
}; };
template<> struct device_type_traits<uchar2> { template<> struct device_type_traits<uchar2> {
static const DataType data_type = TYPE_UCHAR; static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 2; static const int num_elements = 2;
}; };
template<> struct device_type_traits<uchar3> { template<> struct device_type_traits<uchar3> {
static const DataType data_type = TYPE_UCHAR; static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 3; static const int num_elements = 3;
}; };
template<> struct device_type_traits<uchar4> { template<> struct device_type_traits<uchar4> {
static const DataType data_type = TYPE_UCHAR; static const DataType data_type = TYPE_UCHAR;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<uint> { template<> struct device_type_traits<uint> {
static const DataType data_type = TYPE_UINT; static const DataType data_type = TYPE_UINT;
static const int num_elements = 1; static const int num_elements = 1;
}; };
template<> struct device_type_traits<uint2> { template<> struct device_type_traits<uint2> {
static const DataType data_type = TYPE_UINT; static const DataType data_type = TYPE_UINT;
static const int num_elements = 2; static const int num_elements = 2;
}; };
template<> struct device_type_traits<uint3> { template<> struct device_type_traits<uint3> {
static const DataType data_type = TYPE_UINT; static const DataType data_type = TYPE_UINT;
static const int num_elements = 3; static const int num_elements = 3;
}; };
template<> struct device_type_traits<uint4> { template<> struct device_type_traits<uint4> {
static const DataType data_type = TYPE_UINT; static const DataType data_type = TYPE_UINT;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<int> { template<> struct device_type_traits<int> {
static const DataType data_type = TYPE_INT; static const DataType data_type = TYPE_INT;
static const int num_elements = 1; static const int num_elements = 1;
}; };
template<> struct device_type_traits<int2> { template<> struct device_type_traits<int2> {
static const DataType data_type = TYPE_INT; static const DataType data_type = TYPE_INT;
static const int num_elements = 2; static const int num_elements = 2;
}; };
template<> struct device_type_traits<int3> { template<> struct device_type_traits<int3> {
static const DataType data_type = TYPE_INT; static const DataType data_type = TYPE_INT;
static const int num_elements = 3; static const int num_elements = 3;
}; };
template<> struct device_type_traits<int4> { template<> struct device_type_traits<int4> {
static const DataType data_type = TYPE_INT; static const DataType data_type = TYPE_INT;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<float> { template<> struct device_type_traits<float> {
static const DataType data_type = TYPE_FLOAT; static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 1; static const int num_elements = 1;
}; };
template<> struct device_type_traits<float2> { template<> struct device_type_traits<float2> {
static const DataType data_type = TYPE_FLOAT; static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 2; static const int num_elements = 2;
}; };
template<> struct device_type_traits<float3> { template<> struct device_type_traits<float3> {
static const DataType data_type = TYPE_FLOAT; static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<float4> { template<> struct device_type_traits<float4> {
static const DataType data_type = TYPE_FLOAT; static const DataType data_type = TYPE_FLOAT;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<half> { template<> struct device_type_traits<half> {
static const DataType data_type = TYPE_HALF; static const DataType data_type = TYPE_HALF;
static const int num_elements = 1; static const int num_elements = 1;
}; };
template<> struct device_type_traits<ushort4> { template<> struct device_type_traits<ushort4> {
static const DataType data_type = TYPE_UINT16; static const DataType data_type = TYPE_UINT16;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<uint16_t> { template<> struct device_type_traits<uint16_t> {
static const DataType data_type = TYPE_UINT16; static const DataType data_type = TYPE_UINT16;
static const int num_elements = 1; static const int num_elements = 1;
}; };
template<> struct device_type_traits<half4> { template<> struct device_type_traits<half4> {
static const DataType data_type = TYPE_HALF; static const DataType data_type = TYPE_HALF;
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<uint64_t> { template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64; static const DataType data_type = TYPE_UINT64;
static const int num_elements = 1; static const int num_elements = 1;
}; };
/* Device Memory /* Device Memory
@@ -184,64 +187,67 @@ template<> struct device_type_traits<uint64_t> {
* Base class for all device memory. This should not be allocated directly, * Base class for all device memory. This should not be allocated directly,
* instead the appropriate subclass can be used. */ * instead the appropriate subclass can be used. */
class device_memory class device_memory {
{ public:
public: size_t memory_size()
size_t memory_size() { return data_size*data_elements*datatype_size(data_type); } {
size_t memory_elements_size(int elements) { return data_size * data_elements * datatype_size(data_type);
return elements*data_elements*datatype_size(data_type); }
} size_t memory_elements_size(int elements)
{
return elements * data_elements * datatype_size(data_type);
}
/* Data information. */ /* Data information. */
DataType data_type; DataType data_type;
int data_elements; int data_elements;
size_t data_size; size_t data_size;
size_t device_size; size_t device_size;
size_t data_width; size_t data_width;
size_t data_height; size_t data_height;
size_t data_depth; size_t data_depth;
MemoryType type; MemoryType type;
const char *name; const char *name;
InterpolationType interpolation; InterpolationType interpolation;
ExtensionType extension; ExtensionType extension;
/* Pointers. */ /* Pointers. */
Device *device; Device *device;
device_ptr device_pointer; device_ptr device_pointer;
void *host_pointer; void *host_pointer;
void *shared_pointer; void *shared_pointer;
virtual ~device_memory(); virtual ~device_memory();
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr); void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
void restore_device(); void restore_device();
protected: protected:
friend class CUDADevice; friend class CUDADevice;
/* Only create through subclasses. */ /* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type); device_memory(Device *device, const char *name, MemoryType type);
/* No copying allowed. */ /* No copying allowed. */
device_memory(const device_memory&); device_memory(const device_memory &);
device_memory& operator = (const device_memory&); device_memory &operator=(const device_memory &);
/* Host allocation on the device. All host_pointer memory should be /* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using * allocated with these functions, for devices that support using
* the same pointer for host and device. */ * the same pointer for host and device. */
void *host_alloc(size_t size); void *host_alloc(size_t size);
void host_free(); void host_free();
/* Device memory allocation and copying. */ /* Device memory allocation and copying. */
void device_alloc(); void device_alloc();
void device_free(); void device_free();
void device_copy_to(); void device_copy_to();
void device_copy_from(int y, int w, int h, int elem); void device_copy_from(int y, int w, int h, int elem);
void device_zero(); void device_zero();
device_ptr original_device_ptr; device_ptr original_device_ptr;
size_t original_device_size; size_t original_device_size;
Device *original_device; Device *original_device;
}; };
/* Device Only Memory /* Device Only Memory
@@ -249,51 +255,49 @@ protected:
* Working memory only needed by the device, with no corresponding allocation * Working memory only needed by the device, with no corresponding allocation
* on the host. Only used internally in the device implementations. */ * on the host. Only used internally in the device implementations. */
template<typename T> template<typename T> class device_only_memory : public device_memory {
class device_only_memory : public device_memory public:
{ device_only_memory(Device *device, const char *name)
public: : device_memory(device, name, MEM_DEVICE_ONLY)
device_only_memory(Device *device, const char *name) {
: device_memory(device, name, MEM_DEVICE_ONLY) data_type = device_type_traits<T>::data_type;
{ data_elements = max(device_type_traits<T>::num_elements, 1);
data_type = device_type_traits<T>::data_type; }
data_elements = max(device_type_traits<T>::num_elements, 1);
}
virtual ~device_only_memory() virtual ~device_only_memory()
{ {
free(); free();
} }
void alloc_to_device(size_t num, bool shrink_to_fit = true) void alloc_to_device(size_t num, bool shrink_to_fit = true)
{ {
size_t new_size = num; size_t new_size = num;
bool reallocate; bool reallocate;
if(shrink_to_fit) { if (shrink_to_fit) {
reallocate = (data_size != new_size); reallocate = (data_size != new_size);
} }
else { else {
reallocate = (data_size < new_size); reallocate = (data_size < new_size);
} }
if(reallocate) { if (reallocate) {
device_free(); device_free();
data_size = new_size; data_size = new_size;
device_alloc(); device_alloc();
} }
} }
void free() void free()
{ {
device_free(); device_free();
data_size = 0; data_size = 0;
} }
void zero_to_device() void zero_to_device()
{ {
device_zero(); device_zero();
} }
}; };
/* Device Vector /* Device Vector
@@ -307,135 +311,134 @@ public:
* automatically attached to kernel globals, using the provided name * automatically attached to kernel globals, using the provided name
* matching an entry in kernel_textures.h. */ * matching an entry in kernel_textures.h. */
template<typename T> class device_vector : public device_memory template<typename T> class device_vector : public device_memory {
{ public:
public: device_vector(Device *device, const char *name, MemoryType type)
device_vector(Device *device, const char *name, MemoryType type) : device_memory(device, name, type)
: device_memory(device, name, type) {
{ data_type = device_type_traits<T>::data_type;
data_type = device_type_traits<T>::data_type; data_elements = device_type_traits<T>::num_elements;
data_elements = device_type_traits<T>::num_elements;
assert(data_elements > 0); assert(data_elements > 0);
} }
virtual ~device_vector() virtual ~device_vector()
{ {
free(); free();
} }
/* Host memory allocation. */ /* Host memory allocation. */
T *alloc(size_t width, size_t height = 0, size_t depth = 0) T *alloc(size_t width, size_t height = 0, size_t depth = 0)
{ {
size_t new_size = size(width, height, depth); size_t new_size = size(width, height, depth);
if(new_size != data_size) { if (new_size != data_size) {
device_free(); device_free();
host_free(); host_free();
host_pointer = host_alloc(sizeof(T)*new_size); host_pointer = host_alloc(sizeof(T) * new_size);
assert(device_pointer == 0); assert(device_pointer == 0);
} }
data_size = new_size; data_size = new_size;
data_width = width; data_width = width;
data_height = height; data_height = height;
data_depth = depth; data_depth = depth;
return data(); return data();
} }
/* Host memory resize. Only use this if the original data needs to be /* Host memory resize. Only use this if the original data needs to be
* preserved, it is faster to call alloc() if it can be discarded. */ * preserved, it is faster to call alloc() if it can be discarded. */
T *resize(size_t width, size_t height = 0, size_t depth = 0) T *resize(size_t width, size_t height = 0, size_t depth = 0)
{ {
size_t new_size = size(width, height, depth); size_t new_size = size(width, height, depth);
if(new_size != data_size) { if (new_size != data_size) {
void *new_ptr = host_alloc(sizeof(T)*new_size); void *new_ptr = host_alloc(sizeof(T) * new_size);
if(new_size && data_size) { if (new_size && data_size) {
size_t min_size = ((new_size < data_size)? new_size: data_size); size_t min_size = ((new_size < data_size) ? new_size : data_size);
memcpy((T*)new_ptr, (T*)host_pointer, sizeof(T)*min_size); memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
} }
device_free(); device_free();
host_free(); host_free();
host_pointer = new_ptr; host_pointer = new_ptr;
assert(device_pointer == 0); assert(device_pointer == 0);
} }
data_size = new_size; data_size = new_size;
data_width = width; data_width = width;
data_height = height; data_height = height;
data_depth = depth; data_depth = depth;
return data(); return data();
} }
/* Take over data from an existing array. */ /* Take over data from an existing array. */
void steal_data(array<T>& from) void steal_data(array<T> &from)
{ {
device_free(); device_free();
host_free(); host_free();
data_size = from.size(); data_size = from.size();
data_width = 0; data_width = 0;
data_height = 0; data_height = 0;
data_depth = 0; data_depth = 0;
host_pointer = from.steal_pointer(); host_pointer = from.steal_pointer();
assert(device_pointer == 0); assert(device_pointer == 0);
} }
/* Free device and host memory. */ /* Free device and host memory. */
void free() void free()
{ {
device_free(); device_free();
host_free(); host_free();
data_size = 0; data_size = 0;
data_width = 0; data_width = 0;
data_height = 0; data_height = 0;
data_depth = 0; data_depth = 0;
host_pointer = 0; host_pointer = 0;
assert(device_pointer == 0); assert(device_pointer == 0);
} }
size_t size() size_t size()
{ {
return data_size; return data_size;
} }
T* data() T *data()
{ {
return (T*)host_pointer; return (T *)host_pointer;
} }
T& operator[](size_t i) T &operator[](size_t i)
{ {
assert(i < data_size); assert(i < data_size);
return data()[i]; return data()[i];
} }
void copy_to_device() void copy_to_device()
{ {
device_copy_to(); device_copy_to();
} }
void copy_from_device(int y, int w, int h) void copy_from_device(int y, int w, int h)
{ {
device_copy_from(y, w, h, sizeof(T)); device_copy_from(y, w, h, sizeof(T));
} }
void zero_to_device() void zero_to_device()
{ {
device_zero(); device_zero();
} }
protected: protected:
size_t size(size_t width, size_t height, size_t depth) size_t size(size_t width, size_t height, size_t depth)
{ {
return width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
} }
}; };
/* Pixel Memory /* Pixel Memory
@@ -443,28 +446,26 @@ protected:
* Device memory to efficiently draw as pixels to the screen in interactive * Device memory to efficiently draw as pixels to the screen in interactive
* rendering. Only copying pixels from the device is supported, not copying to. */ * rendering. Only copying pixels from the device is supported, not copying to. */
template<typename T> class device_pixels : public device_vector<T> template<typename T> class device_pixels : public device_vector<T> {
{ public:
public: device_pixels(Device *device, const char *name) : device_vector<T>(device, name, MEM_PIXELS)
device_pixels(Device *device, const char *name) {
: device_vector<T>(device, name, MEM_PIXELS) }
{
}
void alloc_to_device(size_t width, size_t height, size_t depth = 0) void alloc_to_device(size_t width, size_t height, size_t depth = 0)
{ {
device_vector<T>::alloc(width, height, depth); device_vector<T>::alloc(width, height, depth);
if(!device_memory::device_pointer) { if (!device_memory::device_pointer) {
device_memory::device_alloc(); device_memory::device_alloc();
} }
} }
T *copy_from_device(int y, int w, int h) T *copy_from_device(int y, int w, int h)
{ {
device_memory::device_copy_from(y, w, h, sizeof(T)); device_memory::device_copy_from(y, w, h, sizeof(T));
return device_vector<T>::data(); return device_vector<T>::data();
} }
}; };
/* Device Sub Memory /* Device Sub Memory
@@ -476,25 +477,24 @@ public:
* Note: some devices require offset and size of the sub_ptr to be properly * Note: some devices require offset and size of the sub_ptr to be properly
* aligned to device->mem_address_alingment(). */ * aligned to device->mem_address_alingment(). */
class device_sub_ptr class device_sub_ptr {
{ public:
public: device_sub_ptr(device_memory &mem, int offset, int size);
device_sub_ptr(device_memory& mem, int offset, int size); ~device_sub_ptr();
~device_sub_ptr();
device_ptr operator*() const device_ptr operator*() const
{ {
return ptr; return ptr;
} }
protected: protected:
/* No copying. */ /* No copying. */
device_sub_ptr& operator = (const device_sub_ptr&); device_sub_ptr &operator=(const device_sub_ptr &);
Device *device; Device *device;
device_ptr ptr; device_ptr ptr;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __DEVICE_MEMORY_H__ */ #endif /* __DEVICE_MEMORY_H__ */

View File

@@ -31,391 +31,406 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
class MultiDevice : public Device class MultiDevice : public Device {
{ public:
public: struct SubDevice {
struct SubDevice { explicit SubDevice(Device *device_) : device(device_)
explicit SubDevice(Device *device_) {
: device(device_) {} }
Device *device; Device *device;
map<device_ptr, device_ptr> ptr_map; map<device_ptr, device_ptr> ptr_map;
}; };
list<SubDevice> devices; list<SubDevice> devices;
device_ptr unique_key; device_ptr unique_key;
MultiDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_) MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), unique_key(1) : Device(info, stats, profiler, background_), unique_key(1)
{ {
foreach(DeviceInfo& subinfo, info.multi_devices) { foreach (DeviceInfo &subinfo, info.multi_devices) {
Device *device = Device::create(subinfo, sub_stats_, profiler, background); Device *device = Device::create(subinfo, sub_stats_, profiler, background);
/* Always add CPU devices at the back since GPU devices can change /* Always add CPU devices at the back since GPU devices can change
* host memory pointers, which CPU uses as device pointer. */ * host memory pointers, which CPU uses as device pointer. */
if(subinfo.type == DEVICE_CPU) { if (subinfo.type == DEVICE_CPU) {
devices.push_back(SubDevice(device)); devices.push_back(SubDevice(device));
} }
else { else {
devices.push_front(SubDevice(device)); devices.push_front(SubDevice(device));
} }
} }
#ifdef WITH_NETWORK #ifdef WITH_NETWORK
/* try to add network devices */ /* try to add network devices */
ServerDiscovery discovery(true); ServerDiscovery discovery(true);
time_sleep(1.0); time_sleep(1.0);
vector<string> servers = discovery.get_server_list(); vector<string> servers = discovery.get_server_list();
foreach(string& server, servers) { foreach (string &server, servers) {
Device *device = device_network_create(info, stats, profiler, server.c_str()); Device *device = device_network_create(info, stats, profiler, server.c_str());
if(device) if (device)
devices.push_back(SubDevice(device)); devices.push_back(SubDevice(device));
} }
#endif #endif
} }
~MultiDevice() ~MultiDevice()
{ {
foreach(SubDevice& sub, devices) foreach (SubDevice &sub, devices)
delete sub.device; delete sub.device;
} }
const string& error_message() const string &error_message()
{ {
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
if(sub.device->error_message() != "") { if (sub.device->error_message() != "") {
if(error_msg == "") if (error_msg == "")
error_msg = sub.device->error_message(); error_msg = sub.device->error_message();
break; break;
} }
} }
return error_msg; return error_msg;
} }
virtual bool show_samples() const virtual bool show_samples() const
{ {
if(devices.size() > 1) { if (devices.size() > 1) {
return false; return false;
} }
return devices.front().device->show_samples(); return devices.front().device->show_samples();
} }
virtual BVHLayoutMask get_bvh_layout_mask() const { virtual BVHLayoutMask get_bvh_layout_mask() const
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL; {
foreach(const SubDevice& sub_device, devices) { BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
bvh_layout_mask &= sub_device.device->get_bvh_layout_mask(); foreach (const SubDevice &sub_device, devices) {
} bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
return bvh_layout_mask; }
} return bvh_layout_mask;
}
bool load_kernels(const DeviceRequestedFeatures& requested_features) bool load_kernels(const DeviceRequestedFeatures &requested_features)
{ {
foreach(SubDevice& sub, devices) foreach (SubDevice &sub, devices)
if(!sub.device->load_kernels(requested_features)) if (!sub.device->load_kernels(requested_features))
return false; return false;
return true; return true;
} }
bool wait_for_availability(const DeviceRequestedFeatures& requested_features) bool wait_for_availability(const DeviceRequestedFeatures &requested_features)
{ {
foreach(SubDevice& sub, devices) foreach (SubDevice &sub, devices)
if(!sub.device->wait_for_availability(requested_features)) if (!sub.device->wait_for_availability(requested_features))
return false; return false;
return true; return true;
} }
DeviceKernelStatus get_active_kernel_switch_state() DeviceKernelStatus get_active_kernel_switch_state()
{ {
DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL; DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state(); DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
switch (subresult) { switch (subresult) {
case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL: case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
result = subresult; result = subresult;
break; break;
case DEVICE_KERNEL_FEATURE_KERNEL_INVALID: case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE: case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
return subresult; return subresult;
case DEVICE_KERNEL_USING_FEATURE_KERNEL: case DEVICE_KERNEL_USING_FEATURE_KERNEL:
case DEVICE_KERNEL_UNKNOWN: case DEVICE_KERNEL_UNKNOWN:
break; break;
} }
} }
return result; return result;
} }
void mem_alloc(device_memory& mem) void mem_alloc(device_memory &mem)
{ {
device_ptr key = unique_key++; device_ptr key = unique_key++;
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
mem.device = sub.device; mem.device = sub.device;
mem.device_pointer = 0; mem.device_pointer = 0;
mem.device_size = 0; mem.device_size = 0;
sub.device->mem_alloc(mem); sub.device->mem_alloc(mem);
sub.ptr_map[key] = mem.device_pointer; sub.ptr_map[key] = mem.device_pointer;
} }
mem.device = this; mem.device = this;
mem.device_pointer = key; mem.device_pointer = key;
stats.mem_alloc(mem.device_size); stats.mem_alloc(mem.device_size);
} }
void mem_copy_to(device_memory& mem) void mem_copy_to(device_memory &mem)
{ {
device_ptr existing_key = mem.device_pointer; device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key)? existing_key: unique_key++; device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size; size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
mem.device = sub.device; mem.device = sub.device;
mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0; mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size; mem.device_size = existing_size;
sub.device->mem_copy_to(mem); sub.device->mem_copy_to(mem);
sub.ptr_map[key] = mem.device_pointer; sub.ptr_map[key] = mem.device_pointer;
} }
mem.device = this; mem.device = this;
mem.device_pointer = key; mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size); stats.mem_alloc(mem.device_size - existing_size);
} }
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
{ {
device_ptr key = mem.device_pointer; device_ptr key = mem.device_pointer;
int i = 0, sub_h = h/devices.size(); int i = 0, sub_h = h / devices.size();
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
int sy = y + i*sub_h; int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h; int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
mem.device = sub.device; mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key]; mem.device_pointer = sub.ptr_map[key];
sub.device->mem_copy_from(mem, sy, w, sh, elem); sub.device->mem_copy_from(mem, sy, w, sh, elem);
i++; i++;
} }
mem.device = this; mem.device = this;
mem.device_pointer = key; mem.device_pointer = key;
} }
void mem_zero(device_memory& mem) void mem_zero(device_memory &mem)
{ {
device_ptr existing_key = mem.device_pointer; device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key)? existing_key: unique_key++; device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size; size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
mem.device = sub.device; mem.device = sub.device;
mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0; mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size; mem.device_size = existing_size;
sub.device->mem_zero(mem); sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer; sub.ptr_map[key] = mem.device_pointer;
} }
mem.device = this; mem.device = this;
mem.device_pointer = key; mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size); stats.mem_alloc(mem.device_size - existing_size);
} }
void mem_free(device_memory& mem) void mem_free(device_memory &mem)
{ {
device_ptr key = mem.device_pointer; device_ptr key = mem.device_pointer;
size_t existing_size = mem.device_size; size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
mem.device = sub.device; mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key]; mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size; mem.device_size = existing_size;
sub.device->mem_free(mem); sub.device->mem_free(mem);
sub.ptr_map.erase(sub.ptr_map.find(key)); sub.ptr_map.erase(sub.ptr_map.find(key));
} }
mem.device = this; mem.device = this;
mem.device_pointer = 0; mem.device_pointer = 0;
mem.device_size = 0; mem.device_size = 0;
stats.mem_free(existing_size); stats.mem_free(existing_size);
} }
void const_copy_to(const char *name, void *host, size_t size) void const_copy_to(const char *name, void *host, size_t size)
{ {
foreach(SubDevice& sub, devices) foreach (SubDevice &sub, devices)
sub.device->const_copy_to(name, host, size); sub.device->const_copy_to(name, host, size);
} }
void draw_pixels( void draw_pixels(device_memory &rgba,
device_memory& rgba, int y, int y,
int w, int h, int width, int height, int w,
int dx, int dy, int dw, int dh, int h,
bool transparent, const DeviceDrawParams &draw_params) int width,
{ int height,
device_ptr key = rgba.device_pointer; int dx,
int i = 0, sub_h = h/devices.size(); int dy,
int sub_height = height/devices.size(); int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h / devices.size();
int sub_height = height / devices.size();
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
int sy = y + i*sub_h; int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h; int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height; int sheight = (i == (int)devices.size() - 1) ? height - sub_height * i : sub_height;
int sdy = dy + i*sub_height; int sdy = dy + i * sub_height;
/* adjust math for w/width */ /* adjust math for w/width */
rgba.device_pointer = sub.ptr_map[key]; rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params); sub.device->draw_pixels(
i++; rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
} i++;
}
rgba.device_pointer = key; rgba.device_pointer = key;
} }
void map_tile(Device *sub_device, RenderTile& tile) void map_tile(Device *sub_device, RenderTile &tile)
{ {
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
if(sub.device == sub_device) { if (sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer]; if (tile.buffer)
} tile.buffer = sub.ptr_map[tile.buffer];
} }
} }
}
int device_number(Device *sub_device) int device_number(Device *sub_device)
{ {
int i = 0; int i = 0;
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
if(sub.device == sub_device) if (sub.device == sub_device)
return i; return i;
i++; i++;
} }
return -1; return -1;
} }
void map_neighbor_tiles(Device *sub_device, RenderTile *tiles) void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{ {
for(int i = 0; i < 9; i++) { for (int i = 0; i < 9; i++) {
if(!tiles[i].buffers) { if (!tiles[i].buffers) {
continue; continue;
} }
/* If the tile was rendered on another device, copy its memory to /* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task. * to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls * Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */ * the device, so this function is not thread safe. */
device_vector<float> &mem = tiles[i].buffers->buffer; device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) { if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but /* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising * also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid * a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being denoised by the CPU thread. */ * overwriting the buffer being denoised by the CPU thread. */
if(!tiles[i].buffers->map_neighbor_copied) { if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true; tiles[i].buffers->map_neighbor_copied = true;
mem.copy_from_device(0, mem.data_size, 1); mem.copy_from_device(0, mem.data_size, 1);
} }
mem.swap_device(sub_device, 0, 0); mem.swap_device(sub_device, 0, 0);
mem.copy_to_device(); mem.copy_to_device();
tiles[i].buffer = mem.device_pointer; tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size; tiles[i].device_size = mem.device_size;
mem.restore_device(); mem.restore_device();
} }
} }
} }
void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles) void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{ {
/* Copy denoised result back to the host. */ /* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer; device_vector<float> &mem = tiles[9].buffers->buffer;
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer); mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
mem.copy_from_device(0, mem.data_size, 1); mem.copy_from_device(0, mem.data_size, 1);
mem.restore_device(); mem.restore_device();
/* Copy denoised result to the original device. */ /* Copy denoised result to the original device. */
mem.copy_to_device(); mem.copy_to_device();
for(int i = 0; i < 9; i++) { for (int i = 0; i < 9; i++) {
if(!tiles[i].buffers) { if (!tiles[i].buffers) {
continue; continue;
} }
device_vector<float> &mem = tiles[i].buffers->buffer; device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) { if (mem.device != sub_device) {
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer); mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem); sub_device->mem_free(mem);
mem.restore_device(); mem.restore_device();
} }
} }
} }
int get_split_task_count(DeviceTask& task) int get_split_task_count(DeviceTask &task)
{ {
int total_tasks = 0; int total_tasks = 0;
list<DeviceTask> tasks; list<DeviceTask> tasks;
task.split(tasks, devices.size()); task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
if(!tasks.empty()) { if (!tasks.empty()) {
DeviceTask subtask = tasks.front(); DeviceTask subtask = tasks.front();
tasks.pop_front(); tasks.pop_front();
total_tasks += sub.device->get_split_task_count(subtask); total_tasks += sub.device->get_split_task_count(subtask);
} }
} }
return total_tasks; return total_tasks;
} }
void task_add(DeviceTask& task) void task_add(DeviceTask &task)
{ {
list<DeviceTask> tasks; list<DeviceTask> tasks;
task.split(tasks, devices.size()); task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) { foreach (SubDevice &sub, devices) {
if(!tasks.empty()) { if (!tasks.empty()) {
DeviceTask subtask = tasks.front(); DeviceTask subtask = tasks.front();
tasks.pop_front(); tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer]; if (task.buffer)
if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte]; subtask.buffer = sub.ptr_map[task.buffer];
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half]; if (task.rgba_byte)
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input]; subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output]; if (task.rgba_half)
subtask.rgba_half = sub.ptr_map[task.rgba_half];
if (task.shader_input)
subtask.shader_input = sub.ptr_map[task.shader_input];
if (task.shader_output)
subtask.shader_output = sub.ptr_map[task.shader_output];
sub.device->task_add(subtask); sub.device->task_add(subtask);
} }
} }
} }
void task_wait() void task_wait()
{ {
foreach(SubDevice& sub, devices) foreach (SubDevice &sub, devices)
sub.device->task_wait(); sub.device->task_wait();
} }
void task_cancel() void task_cancel()
{ {
foreach(SubDevice& sub, devices) foreach (SubDevice &sub, devices)
sub.device->task_cancel(); sub.device->task_cancel();
} }
protected: protected:
Stats sub_stats_; Stats sub_stats_;
}; };
Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background) Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{ {
return new MultiDevice(info, stats, profiler, background); return new MultiDevice(info, stats, profiler, background);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

View File

@@ -19,35 +19,35 @@
#ifdef WITH_NETWORK #ifdef WITH_NETWORK
#include <boost/archive/text_iarchive.hpp> # include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp> # include <boost/archive/text_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp> # include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp> # include <boost/archive/binary_oarchive.hpp>
#include <boost/array.hpp> # include <boost/array.hpp>
#include <boost/asio.hpp> # include <boost/asio.hpp>
#include <boost/bind.hpp> # include <boost/bind.hpp>
#include <boost/serialization/vector.hpp> # include <boost/serialization/vector.hpp>
#include <boost/thread.hpp> # include <boost/thread.hpp>
#include <iostream> # include <iostream>
#include <sstream> # include <sstream>
#include <deque> # include <deque>
#include "render/buffers.h" # include "render/buffers.h"
#include "util/util_foreach.h" # include "util/util_foreach.h"
#include "util/util_list.h" # include "util/util_list.h"
#include "util/util_map.h" # include "util/util_map.h"
#include "util/util_param.h" # include "util/util_param.h"
#include "util/util_string.h" # include "util/util_string.h"
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
using std::cout;
using std::cerr; using std::cerr;
using std::cout;
using std::exception;
using std::hex; using std::hex;
using std::setw; using std::setw;
using std::exception;
using boost::asio::ip::tcp; using boost::asio::ip::tcp;
@@ -56,436 +56,435 @@ static const int DISCOVER_PORT = 5121;
static const string DISCOVER_REQUEST_MSG = "REQUEST_RENDER_SERVER_IP"; static const string DISCOVER_REQUEST_MSG = "REQUEST_RENDER_SERVER_IP";
static const string DISCOVER_REPLY_MSG = "REPLY_RENDER_SERVER_IP"; static const string DISCOVER_REPLY_MSG = "REPLY_RENDER_SERVER_IP";
#if 0 # if 0
typedef boost::archive::text_oarchive o_archive; typedef boost::archive::text_oarchive o_archive;
typedef boost::archive::text_iarchive i_archive; typedef boost::archive::text_iarchive i_archive;
#else # else
typedef boost::archive::binary_oarchive o_archive; typedef boost::archive::binary_oarchive o_archive;
typedef boost::archive::binary_iarchive i_archive; typedef boost::archive::binary_iarchive i_archive;
#endif # endif
/* Serialization of device memory */ /* Serialization of device memory */
class network_device_memory : public device_memory class network_device_memory : public device_memory {
{ public:
public: network_device_memory(Device *device) : device_memory(device, "", MEM_READ_ONLY)
network_device_memory(Device *device) {
: device_memory(device, "", MEM_READ_ONLY) }
{
}
~network_device_memory() ~network_device_memory()
{ {
device_pointer = 0; device_pointer = 0;
}; };
vector<char> local_data; vector<char> local_data;
}; };
/* Common netowrk error function / object for both DeviceNetwork and DeviceServer*/ /* Common netowrk error function / object for both DeviceNetwork and DeviceServer*/
class NetworkError { class NetworkError {
public: public:
NetworkError() { NetworkError()
error = ""; {
error_count = 0; error = "";
} error_count = 0;
}
~NetworkError() {} ~NetworkError()
{
}
void network_error(const string& message) { void network_error(const string &message)
error = message; {
error_count += 1; error = message;
} error_count += 1;
}
bool have_error() { bool have_error()
return true ? error_count > 0 : false; {
} return true ? error_count > 0 : false;
}
private: private:
string error; string error;
int error_count; int error_count;
}; };
/* Remote procedure call Send */ /* Remote procedure call Send */
class RPCSend { class RPCSend {
public: public:
RPCSend(tcp::socket& socket_, NetworkError* e, const string& name_ = "") RPCSend(tcp::socket &socket_, NetworkError *e, const string &name_ = "")
: name(name_), socket(socket_), archive(archive_stream), sent(false) : name(name_), socket(socket_), archive(archive_stream), sent(false)
{ {
archive & name_; archive &name_;
error_func = e; error_func = e;
fprintf(stderr, "rpc send %s\n", name.c_str()); fprintf(stderr, "rpc send %s\n", name.c_str());
} }
~RPCSend() ~RPCSend()
{ {
} }
void add(const device_memory& mem) void add(const device_memory &mem)
{ {
archive & mem.data_type & mem.data_elements & mem.data_size; archive &mem.data_type &mem.data_elements &mem.data_size;
archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; archive &mem.data_width &mem.data_height &mem.data_depth &mem.device_pointer;
archive & mem.type & string(mem.name); archive &mem.type &string(mem.name);
archive & mem.interpolation & mem.extension; archive &mem.interpolation &mem.extension;
archive & mem.device_pointer; archive &mem.device_pointer;
} }
template<typename T> void add(const T& data) template<typename T> void add(const T &data)
{ {
archive & data; archive &data;
} }
void add(const DeviceTask& task) void add(const DeviceTask &task)
{ {
int type = (int)task.type; int type = (int)task.type;
archive & type & task.x & task.y & task.w & task.h; archive &type &task.x &task.y &task.w &task.h;
archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples; archive &task.rgba_byte &task.rgba_half &task.buffer &task.sample &task.num_samples;
archive & task.offset & task.stride; archive &task.offset &task.stride;
archive & task.shader_input & task.shader_output & task.shader_eval_type; archive &task.shader_input &task.shader_output &task.shader_eval_type;
archive & task.shader_x & task.shader_w; archive &task.shader_x &task.shader_w;
archive & task.need_finish_queue; archive &task.need_finish_queue;
} }
void add(const RenderTile& tile) void add(const RenderTile &tile)
{ {
archive & tile.x & tile.y & tile.w & tile.h; archive &tile.x &tile.y &tile.w &tile.h;
archive & tile.start_sample & tile.num_samples & tile.sample; archive &tile.start_sample &tile.num_samples &tile.sample;
archive & tile.resolution & tile.offset & tile.stride; archive &tile.resolution &tile.offset &tile.stride;
archive & tile.buffer; archive &tile.buffer;
} }
void write() void write()
{ {
boost::system::error_code error; boost::system::error_code error;
/* get string from stream */ /* get string from stream */
string archive_str = archive_stream.str(); string archive_str = archive_stream.str();
/* first send fixed size header with size of following data */ /* first send fixed size header with size of following data */
ostringstream header_stream; ostringstream header_stream;
header_stream << setw(8) << hex << archive_str.size(); header_stream << setw(8) << hex << archive_str.size();
string header_str = header_stream.str(); string header_str = header_stream.str();
boost::asio::write(socket, boost::asio::write(
boost::asio::buffer(header_str), socket, boost::asio::buffer(header_str), boost::asio::transfer_all(), error);
boost::asio::transfer_all(), error);
if(error.value()) if (error.value())
error_func->network_error(error.message()); error_func->network_error(error.message());
/* then send actual data */ /* then send actual data */
boost::asio::write(socket, boost::asio::write(
boost::asio::buffer(archive_str), socket, boost::asio::buffer(archive_str), boost::asio::transfer_all(), error);
boost::asio::transfer_all(), error);
if(error.value()) if (error.value())
error_func->network_error(error.message()); error_func->network_error(error.message());
sent = true; sent = true;
} }
void write_buffer(void *buffer, size_t size) void write_buffer(void *buffer, size_t size)
{ {
boost::system::error_code error; boost::system::error_code error;
boost::asio::write(socket, boost::asio::write(
boost::asio::buffer(buffer, size), socket, boost::asio::buffer(buffer, size), boost::asio::transfer_all(), error);
boost::asio::transfer_all(), error);
if(error.value()) if (error.value())
error_func->network_error(error.message()); error_func->network_error(error.message());
} }
protected: protected:
string name; string name;
tcp::socket& socket; tcp::socket &socket;
ostringstream archive_stream; ostringstream archive_stream;
o_archive archive; o_archive archive;
bool sent; bool sent;
NetworkError *error_func; NetworkError *error_func;
}; };
/* Remote procedure call Receive */ /* Remote procedure call Receive */
class RPCReceive { class RPCReceive {
public: public:
RPCReceive(tcp::socket& socket_, NetworkError* e ) RPCReceive(tcp::socket &socket_, NetworkError *e)
: socket(socket_), archive_stream(NULL), archive(NULL) : socket(socket_), archive_stream(NULL), archive(NULL)
{ {
error_func = e; error_func = e;
/* read head with fixed size */ /* read head with fixed size */
vector<char> header(8); vector<char> header(8);
boost::system::error_code error; boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(header), error); size_t len = boost::asio::read(socket, boost::asio::buffer(header), error);
if(error.value()) { if (error.value()) {
error_func->network_error(error.message()); error_func->network_error(error.message());
} }
/* verify if we got something */ /* verify if we got something */
if(len == header.size()) { if (len == header.size()) {
/* decode header */ /* decode header */
string header_str(&header[0], header.size()); string header_str(&header[0], header.size());
istringstream header_stream(header_str); istringstream header_stream(header_str);
size_t data_size; size_t data_size;
if((header_stream >> hex >> data_size)) { if ((header_stream >> hex >> data_size)) {
vector<char> data(data_size); vector<char> data(data_size);
size_t len = boost::asio::read(socket, boost::asio::buffer(data), error); size_t len = boost::asio::read(socket, boost::asio::buffer(data), error);
if(error.value()) if (error.value())
error_func->network_error(error.message()); error_func->network_error(error.message());
if (len == data_size) {
archive_str = (data.size()) ? string(&data[0], data.size()) : string("");
if(len == data_size) { archive_stream = new istringstream(archive_str);
archive_str = (data.size())? string(&data[0], data.size()): string(""); archive = new i_archive(*archive_stream);
archive_stream = new istringstream(archive_str); *archive &name;
archive = new i_archive(*archive_stream); fprintf(stderr, "rpc receive %s\n", name.c_str());
}
else {
error_func->network_error("Network receive error: data size doesn't match header");
}
}
else {
error_func->network_error("Network receive error: can't decode data size from header");
}
}
else {
error_func->network_error("Network receive error: invalid header size");
}
}
*archive & name; ~RPCReceive()
fprintf(stderr, "rpc receive %s\n", name.c_str()); {
} delete archive;
else { delete archive_stream;
error_func->network_error("Network receive error: data size doesn't match header"); }
}
}
else {
error_func->network_error("Network receive error: can't decode data size from header");
}
}
else {
error_func->network_error("Network receive error: invalid header size");
}
}
~RPCReceive() void read(network_device_memory &mem, string &name)
{ {
delete archive; *archive &mem.data_type &mem.data_elements &mem.data_size;
delete archive_stream; *archive &mem.data_width &mem.data_height &mem.data_depth &mem.device_pointer;
} *archive &mem.type &name;
*archive &mem.interpolation &mem.extension;
*archive &mem.device_pointer;
void read(network_device_memory& mem, string& name) mem.name = name.c_str();
{ mem.host_pointer = 0;
*archive & mem.data_type & mem.data_elements & mem.data_size;
*archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
*archive & mem.type & name;
*archive & mem.interpolation & mem.extension;
*archive & mem.device_pointer;
mem.name = name.c_str(); /* Can't transfer OpenGL texture over network. */
mem.host_pointer = 0; if (mem.type == MEM_PIXELS) {
mem.type = MEM_READ_WRITE;
}
}
/* Can't transfer OpenGL texture over network. */ template<typename T> void read(T &data)
if(mem.type == MEM_PIXELS) { {
mem.type = MEM_READ_WRITE; *archive &data;
} }
}
template<typename T> void read(T& data) void read_buffer(void *buffer, size_t size)
{ {
*archive & data; boost::system::error_code error;
} size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error);
void read_buffer(void *buffer, size_t size) if (error.value()) {
{ error_func->network_error(error.message());
boost::system::error_code error; }
size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error);
if(error.value()) { if (len != size)
error_func->network_error(error.message()); cout << "Network receive error: buffer size doesn't match expected size\n";
} }
if(len != size) void read(DeviceTask &task)
cout << "Network receive error: buffer size doesn't match expected size\n"; {
} int type;
void read(DeviceTask& task) *archive &type &task.x &task.y &task.w &task.h;
{ *archive &task.rgba_byte &task.rgba_half &task.buffer &task.sample &task.num_samples;
int type; *archive &task.offset &task.stride;
*archive &task.shader_input &task.shader_output &task.shader_eval_type;
*archive &task.shader_x &task.shader_w;
*archive &task.need_finish_queue;
*archive & type & task.x & task.y & task.w & task.h; task.type = (DeviceTask::Type)type;
*archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples; }
*archive & task.offset & task.stride;
*archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w;
*archive & task.need_finish_queue;
task.type = (DeviceTask::Type)type; void read(RenderTile &tile)
} {
*archive &tile.x &tile.y &tile.w &tile.h;
*archive &tile.start_sample &tile.num_samples &tile.sample;
*archive &tile.resolution &tile.offset &tile.stride;
*archive &tile.buffer;
void read(RenderTile& tile) tile.buffers = NULL;
{ }
*archive & tile.x & tile.y & tile.w & tile.h;
*archive & tile.start_sample & tile.num_samples & tile.sample;
*archive & tile.resolution & tile.offset & tile.stride;
*archive & tile.buffer;
tile.buffers = NULL; string name;
}
string name; protected:
tcp::socket &socket;
protected: string archive_str;
tcp::socket& socket; istringstream *archive_stream;
string archive_str; i_archive *archive;
istringstream *archive_stream; NetworkError *error_func;
i_archive *archive;
NetworkError *error_func;
}; };
/* Server auto discovery */ /* Server auto discovery */
class ServerDiscovery { class ServerDiscovery {
public: public:
explicit ServerDiscovery(bool discover = false) explicit ServerDiscovery(bool discover = false)
: listen_socket(io_service), collect_servers(false) : listen_socket(io_service), collect_servers(false)
{ {
/* setup listen socket */ /* setup listen socket */
listen_endpoint.address(boost::asio::ip::address_v4::any()); listen_endpoint.address(boost::asio::ip::address_v4::any());
listen_endpoint.port(DISCOVER_PORT); listen_endpoint.port(DISCOVER_PORT);
listen_socket.open(listen_endpoint.protocol()); listen_socket.open(listen_endpoint.protocol());
boost::asio::socket_base::reuse_address option(true); boost::asio::socket_base::reuse_address option(true);
listen_socket.set_option(option); listen_socket.set_option(option);
listen_socket.bind(listen_endpoint); listen_socket.bind(listen_endpoint);
/* setup receive callback */ /* setup receive callback */
async_receive(); async_receive();
/* start server discovery */ /* start server discovery */
if(discover) { if (discover) {
collect_servers = true; collect_servers = true;
servers.clear(); servers.clear();
broadcast_message(DISCOVER_REQUEST_MSG); broadcast_message(DISCOVER_REQUEST_MSG);
} }
/* start thread */ /* start thread */
work = new boost::asio::io_service::work(io_service); work = new boost::asio::io_service::work(io_service);
thread = new boost::thread(boost::bind(&boost::asio::io_service::run, &io_service)); thread = new boost::thread(boost::bind(&boost::asio::io_service::run, &io_service));
} }
~ServerDiscovery() ~ServerDiscovery()
{ {
io_service.stop(); io_service.stop();
thread->join(); thread->join();
delete thread; delete thread;
delete work; delete work;
} }
vector<string> get_server_list() vector<string> get_server_list()
{ {
vector<string> result; vector<string> result;
mutex.lock(); mutex.lock();
result = vector<string>(servers.begin(), servers.end()); result = vector<string>(servers.begin(), servers.end());
mutex.unlock(); mutex.unlock();
return result; return result;
} }
private: private:
void handle_receive_from(const boost::system::error_code& error, size_t size) void handle_receive_from(const boost::system::error_code &error, size_t size)
{ {
if(error) { if (error) {
cout << "Server discovery receive error: " << error.message() << "\n"; cout << "Server discovery receive error: " << error.message() << "\n";
return; return;
} }
if(size > 0) { if (size > 0) {
string msg = string(receive_buffer, size); string msg = string(receive_buffer, size);
/* handle incoming message */ /* handle incoming message */
if(collect_servers) { if (collect_servers) {
if(msg == DISCOVER_REPLY_MSG) { if (msg == DISCOVER_REPLY_MSG) {
string address = receive_endpoint.address().to_string(); string address = receive_endpoint.address().to_string();
mutex.lock(); mutex.lock();
/* add address if it's not already in the list */ /* add address if it's not already in the list */
bool found = std::find(servers.begin(), servers.end(), bool found = std::find(servers.begin(), servers.end(), address) != servers.end();
address) != servers.end();
if(!found) if (!found)
servers.push_back(address); servers.push_back(address);
mutex.unlock(); mutex.unlock();
} }
} }
else { else {
/* reply to request */ /* reply to request */
if(msg == DISCOVER_REQUEST_MSG) if (msg == DISCOVER_REQUEST_MSG)
broadcast_message(DISCOVER_REPLY_MSG); broadcast_message(DISCOVER_REPLY_MSG);
} }
} }
async_receive(); async_receive();
} }
void async_receive() void async_receive()
{ {
listen_socket.async_receive_from( listen_socket.async_receive_from(boost::asio::buffer(receive_buffer),
boost::asio::buffer(receive_buffer), receive_endpoint, receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from, this, boost::bind(&ServerDiscovery::handle_receive_from,
boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred)); this,
} boost::asio::placeholders::error,
boost::asio::placeholders::bytes_transferred));
}
void broadcast_message(const string& msg) void broadcast_message(const string &msg)
{ {
/* setup broadcast socket */ /* setup broadcast socket */
boost::asio::ip::udp::socket socket(io_service); boost::asio::ip::udp::socket socket(io_service);
socket.open(boost::asio::ip::udp::v4()); socket.open(boost::asio::ip::udp::v4());
boost::asio::socket_base::broadcast option(true); boost::asio::socket_base::broadcast option(true);
socket.set_option(option); socket.set_option(option);
boost::asio::ip::udp::endpoint broadcast_endpoint( boost::asio::ip::udp::endpoint broadcast_endpoint(
boost::asio::ip::address::from_string("255.255.255.255"), DISCOVER_PORT); boost::asio::ip::address::from_string("255.255.255.255"), DISCOVER_PORT);
/* broadcast message */ /* broadcast message */
socket.send_to(boost::asio::buffer(msg), broadcast_endpoint); socket.send_to(boost::asio::buffer(msg), broadcast_endpoint);
} }
/* network service and socket */ /* network service and socket */
boost::asio::io_service io_service; boost::asio::io_service io_service;
boost::asio::ip::udp::endpoint listen_endpoint; boost::asio::ip::udp::endpoint listen_endpoint;
boost::asio::ip::udp::socket listen_socket; boost::asio::ip::udp::socket listen_socket;
/* threading */ /* threading */
boost::thread *thread; boost::thread *thread;
boost::asio::io_service::work *work; boost::asio::io_service::work *work;
boost::mutex mutex; boost::mutex mutex;
/* buffer and endpoint for receiving messages */ /* buffer and endpoint for receiving messages */
char receive_buffer[256]; char receive_buffer[256];
boost::asio::ip::udp::endpoint receive_endpoint; boost::asio::ip::udp::endpoint receive_endpoint;
// os, version, devices, status, host name, group name, ip as far as fields go // os, version, devices, status, host name, group name, ip as far as fields go
struct ServerInfo { struct ServerInfo {
string cycles_version; string cycles_version;
string os; string os;
int device_count; int device_count;
string status; string status;
string host_name; string host_name;
string group_name; string group_name;
string host_addr; string host_addr;
}; };
/* collection of server addresses in list */ /* collection of server addresses in list */
bool collect_servers; bool collect_servers;
vector<string> servers; vector<string> servers;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif #endif
#endif /* __DEVICE_NETWORK_H__ */ #endif /* __DEVICE_NETWORK_H__ */

View File

@@ -16,218 +16,211 @@
#ifdef WITH_OPENCL #ifdef WITH_OPENCL
#include "device/opencl/opencl.h" # include "device/opencl/opencl.h"
#include "device/device_intern.h" # include "device/device_intern.h"
#include "util/util_foreach.h" # include "util/util_foreach.h"
#include "util/util_logging.h" # include "util/util_logging.h"
#include "util/util_set.h" # include "util/util_set.h"
#include "util/util_string.h" # include "util/util_string.h"
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background) Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{ {
return opencl_create_split_device(info, stats, profiler, background); return opencl_create_split_device(info, stats, profiler, background);
} }
bool device_opencl_init() bool device_opencl_init()
{ {
static bool initialized = false; static bool initialized = false;
static bool result = false; static bool result = false;
if(initialized) if (initialized)
return result; return result;
initialized = true; initialized = true;
if(OpenCLInfo::device_type() != 0) { if (OpenCLInfo::device_type() != 0) {
int clew_result = clewInit(); int clew_result = clewInit();
if(clew_result == CLEW_SUCCESS) { if (clew_result == CLEW_SUCCESS) {
VLOG(1) << "CLEW initialization succeeded."; VLOG(1) << "CLEW initialization succeeded.";
result = true; result = true;
} }
else { else {
VLOG(1) << "CLEW initialization failed: " VLOG(1) << "CLEW initialization failed: "
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED) << ((clew_result == CLEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
? "Error setting up atexit() handler" "Error opening the library");
: "Error opening the library"); }
} }
} else {
else { VLOG(1) << "Skip initializing CLEW, platform is force disabled.";
VLOG(1) << "Skip initializing CLEW, platform is force disabled."; result = false;
result = false; }
}
return result; return result;
} }
static cl_int device_opencl_get_num_platforms_safe(cl_uint *num_platforms) static cl_int device_opencl_get_num_platforms_safe(cl_uint *num_platforms)
{ {
#ifdef _WIN32 # ifdef _WIN32
__try { __try {
return clGetPlatformIDs(0, NULL, num_platforms); return clGetPlatformIDs(0, NULL, num_platforms);
} }
__except(EXCEPTION_EXECUTE_HANDLER) { __except (EXCEPTION_EXECUTE_HANDLER) {
/* Ignore crashes inside the OpenCL driver and hope we can /* Ignore crashes inside the OpenCL driver and hope we can
* survive even with corrupted OpenCL installs. */ * survive even with corrupted OpenCL installs. */
fprintf(stderr, "Cycles OpenCL: driver crashed, continuing without OpenCL.\n"); fprintf(stderr, "Cycles OpenCL: driver crashed, continuing without OpenCL.\n");
} }
*num_platforms = 0; *num_platforms = 0;
return CL_DEVICE_NOT_FOUND; return CL_DEVICE_NOT_FOUND;
#else # else
return clGetPlatformIDs(0, NULL, num_platforms); return clGetPlatformIDs(0, NULL, num_platforms);
#endif # endif
} }
void device_opencl_info(vector<DeviceInfo>& devices) void device_opencl_info(vector<DeviceInfo> &devices)
{ {
cl_uint num_platforms = 0; cl_uint num_platforms = 0;
device_opencl_get_num_platforms_safe(&num_platforms); device_opencl_get_num_platforms_safe(&num_platforms);
if(num_platforms == 0) { if (num_platforms == 0) {
return; return;
} }
vector<OpenCLPlatformDevice> usable_devices; vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices); OpenCLInfo::get_usable_devices(&usable_devices);
/* Devices are numbered consecutively across platforms. */ /* Devices are numbered consecutively across platforms. */
int num_devices = 0; int num_devices = 0;
set<string> unique_ids; set<string> unique_ids;
foreach(OpenCLPlatformDevice& platform_device, usable_devices) { foreach (OpenCLPlatformDevice &platform_device, usable_devices) {
/* Compute unique ID for persistent user preferences. */ /* Compute unique ID for persistent user preferences. */
const string& platform_name = platform_device.platform_name; const string &platform_name = platform_device.platform_name;
const string& device_name = platform_device.device_name; const string &device_name = platform_device.device_name;
string hardware_id = platform_device.hardware_id; string hardware_id = platform_device.hardware_id;
if(hardware_id == "") { if (hardware_id == "") {
hardware_id = string_printf("ID_%d", num_devices); hardware_id = string_printf("ID_%d", num_devices);
} }
string id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id; string id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
/* Hardware ID might not be unique, add device number in that case. */ /* Hardware ID might not be unique, add device number in that case. */
if(unique_ids.find(id) != unique_ids.end()) { if (unique_ids.find(id) != unique_ids.end()) {
id += string_printf("_ID_%d", num_devices); id += string_printf("_ID_%d", num_devices);
} }
unique_ids.insert(id); unique_ids.insert(id);
/* Create DeviceInfo. */ /* Create DeviceInfo. */
DeviceInfo info; DeviceInfo info;
info.type = DEVICE_OPENCL; info.type = DEVICE_OPENCL;
info.description = string_remove_trademark(string(device_name)); info.description = string_remove_trademark(string(device_name));
info.num = num_devices; info.num = num_devices;
/* We don't know if it's used for display, but assume it is. */ /* We don't know if it's used for display, but assume it is. */
info.display_device = true; info.display_device = true;
info.use_split_kernel = true; info.use_split_kernel = true;
info.has_volume_decoupled = false; info.has_volume_decoupled = false;
info.id = id; info.id = id;
/* Check OpenCL extensions */ /* Check OpenCL extensions */
info.has_half_images = platform_device.device_extensions.find("cl_khr_fp16") != string::npos; info.has_half_images = platform_device.device_extensions.find("cl_khr_fp16") != string::npos;
devices.push_back(info); devices.push_back(info);
num_devices++; num_devices++;
} }
} }
string device_opencl_capabilities() string device_opencl_capabilities()
{ {
if(OpenCLInfo::device_type() == 0) { if (OpenCLInfo::device_type() == 0) {
return "All OpenCL devices are forced to be OFF"; return "All OpenCL devices are forced to be OFF";
} }
string result = ""; string result = "";
string error_msg = ""; /* Only used by opencl_assert(), but in the future string error_msg = ""; /* Only used by opencl_assert(), but in the future
* it could also be nicely reported to the console. * it could also be nicely reported to the console.
*/ */
cl_uint num_platforms = 0; cl_uint num_platforms = 0;
opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms)); opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms));
if(num_platforms == 0) { if (num_platforms == 0) {
return "No OpenCL platforms found\n"; return "No OpenCL platforms found\n";
} }
result += string_printf("Number of platforms: %u\n", num_platforms); result += string_printf("Number of platforms: %u\n", num_platforms);
vector<cl_platform_id> platform_ids; vector<cl_platform_id> platform_ids;
platform_ids.resize(num_platforms); platform_ids.resize(num_platforms);
opencl_assert(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL)); opencl_assert(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL));
typedef char cl_string[1024]; typedef char cl_string[1024];
#define APPEND_INFO(func, id, name, what, type) \ # define APPEND_INFO(func, id, name, what, type) \
do { \ do { \
type data; \ type data; \
memset(&data, 0, sizeof(data)); \ memset(&data, 0, sizeof(data)); \
opencl_assert(func(id, what, sizeof(data), &data, NULL)); \ opencl_assert(func(id, what, sizeof(data), &data, NULL)); \
result += string_printf("%s: %s\n", name, to_string(data).c_str()); \ result += string_printf("%s: %s\n", name, to_string(data).c_str()); \
} while(false) } while (false)
#define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \ # define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \
do { \ do { \
char data[1024] = "\0"; \ char data[1024] = "\0"; \
size_t length = 0; \ size_t length = 0; \
if(func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \ if (func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \
if(length != 0 && data[0] != '\0') { \ if (length != 0 && data[0] != '\0') { \
result += string_printf("%s: %s\n", name, data); \ result += string_printf("%s: %s\n", name, data); \
} \ } \
} \ } \
} while(false) } while (false)
#define APPEND_PLATFORM_INFO(id, name, what, type) \ # define APPEND_PLATFORM_INFO(id, name, what, type) \
APPEND_INFO(clGetPlatformInfo, id, "\tPlatform " name, what, type) APPEND_INFO(clGetPlatformInfo, id, "\tPlatform " name, what, type)
#define APPEND_DEVICE_INFO(id, name, what, type) \ # define APPEND_DEVICE_INFO(id, name, what, type) \
APPEND_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what, type) APPEND_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what, type)
#define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \ # define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \
APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what) APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
vector<cl_device_id> device_ids; vector<cl_device_id> device_ids;
for(cl_uint platform = 0; platform < num_platforms; ++platform) { for (cl_uint platform = 0; platform < num_platforms; ++platform) {
cl_platform_id platform_id = platform_ids[platform]; cl_platform_id platform_id = platform_ids[platform];
result += string_printf("Platform #%u\n", platform); result += string_printf("Platform #%u\n", platform);
APPEND_PLATFORM_INFO(platform_id, "Name", CL_PLATFORM_NAME, cl_string); APPEND_PLATFORM_INFO(platform_id, "Name", CL_PLATFORM_NAME, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Vendor", CL_PLATFORM_VENDOR, cl_string); APPEND_PLATFORM_INFO(platform_id, "Vendor", CL_PLATFORM_VENDOR, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Version", CL_PLATFORM_VERSION, cl_string); APPEND_PLATFORM_INFO(platform_id, "Version", CL_PLATFORM_VERSION, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Profile", CL_PLATFORM_PROFILE, cl_string); APPEND_PLATFORM_INFO(platform_id, "Profile", CL_PLATFORM_PROFILE, cl_string);
APPEND_PLATFORM_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS, cl_string); APPEND_PLATFORM_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS, cl_string);
cl_uint num_devices = 0; cl_uint num_devices = 0;
opencl_assert(clGetDeviceIDs(platform_ids[platform], opencl_assert(
CL_DEVICE_TYPE_ALL, clGetDeviceIDs(platform_ids[platform], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
0, result += string_printf("\tNumber of devices: %u\n", num_devices);
NULL,
&num_devices));
result += string_printf("\tNumber of devices: %u\n", num_devices);
device_ids.resize(num_devices); device_ids.resize(num_devices);
opencl_assert(clGetDeviceIDs(platform_ids[platform], opencl_assert(clGetDeviceIDs(
CL_DEVICE_TYPE_ALL, platform_ids[platform], CL_DEVICE_TYPE_ALL, num_devices, &device_ids[0], NULL));
num_devices, for (cl_uint device = 0; device < num_devices; ++device) {
&device_ids[0], cl_device_id device_id = device_ids[device];
NULL));
for(cl_uint device = 0; device < num_devices; ++device) {
cl_device_id device_id = device_ids[device];
result += string_printf("\t\tDevice: #%u\n", device); result += string_printf("\t\tDevice: #%u\n", device);
APPEND_DEVICE_INFO(device_id, "Name", CL_DEVICE_NAME, cl_string); APPEND_DEVICE_INFO(device_id, "Name", CL_DEVICE_NAME, cl_string);
APPEND_DEVICE_STRING_EXTENSION_INFO(device_id, "Board Name", CL_DEVICE_BOARD_NAME_AMD); APPEND_DEVICE_STRING_EXTENSION_INFO(device_id, "Board Name", CL_DEVICE_BOARD_NAME_AMD);
APPEND_DEVICE_INFO(device_id, "Vendor", CL_DEVICE_VENDOR, cl_string); APPEND_DEVICE_INFO(device_id, "Vendor", CL_DEVICE_VENDOR, cl_string);
APPEND_DEVICE_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION, cl_string); APPEND_DEVICE_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Profile", CL_DEVICE_PROFILE, cl_string); APPEND_DEVICE_INFO(device_id, "Profile", CL_DEVICE_PROFILE, cl_string);
APPEND_DEVICE_INFO(device_id, "Version", CL_DEVICE_VERSION, cl_string); APPEND_DEVICE_INFO(device_id, "Version", CL_DEVICE_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS, cl_string); APPEND_DEVICE_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS, cl_string);
APPEND_DEVICE_INFO(device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint); APPEND_DEVICE_INFO(
APPEND_DEVICE_INFO(device_id, "Max compute units", CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint); device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max work group size", CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t); APPEND_DEVICE_INFO(device_id, "Max compute units", CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint);
} APPEND_DEVICE_INFO(device_id, "Max work group size", CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t);
} }
}
#undef APPEND_STRING_INFO # undef APPEND_STRING_INFO
#undef APPEND_PLATFORM_STRING_INFO # undef APPEND_PLATFORM_STRING_INFO
#undef APPEND_DEVICE_STRING_INFO # undef APPEND_DEVICE_STRING_INFO
return result; return result;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* WITH_OPENCL */ #endif /* WITH_OPENCL */

View File

@@ -27,299 +27,304 @@ CCL_NAMESPACE_BEGIN
static const double alpha = 0.1; /* alpha for rolling average */ static const double alpha = 0.1; /* alpha for rolling average */
DeviceSplitKernel::DeviceSplitKernel(Device *device) DeviceSplitKernel::DeviceSplitKernel(Device *device)
: device(device), : device(device),
split_data(device, "split_data"), split_data(device, "split_data"),
ray_state(device, "ray_state", MEM_READ_WRITE), ray_state(device, "ray_state", MEM_READ_WRITE),
queue_index(device, "queue_index"), queue_index(device, "queue_index"),
use_queues_flag(device, "use_queues_flag"), use_queues_flag(device, "use_queues_flag"),
work_pool_wgs(device, "work_pool_wgs"), work_pool_wgs(device, "work_pool_wgs"),
kernel_data_initialized(false) kernel_data_initialized(false)
{ {
avg_time_per_sample = 0.0; avg_time_per_sample = 0.0;
kernel_path_init = NULL; kernel_path_init = NULL;
kernel_scene_intersect = NULL; kernel_scene_intersect = NULL;
kernel_lamp_emission = NULL; kernel_lamp_emission = NULL;
kernel_do_volume = NULL; kernel_do_volume = NULL;
kernel_queue_enqueue = NULL; kernel_queue_enqueue = NULL;
kernel_indirect_background = NULL; kernel_indirect_background = NULL;
kernel_shader_setup = NULL; kernel_shader_setup = NULL;
kernel_shader_sort = NULL; kernel_shader_sort = NULL;
kernel_shader_eval = NULL; kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL; kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL; kernel_subsurface_scatter = NULL;
kernel_direct_lighting = NULL; kernel_direct_lighting = NULL;
kernel_shadow_blocked_ao = NULL; kernel_shadow_blocked_ao = NULL;
kernel_shadow_blocked_dl = NULL; kernel_shadow_blocked_dl = NULL;
kernel_enqueue_inactive = NULL; kernel_enqueue_inactive = NULL;
kernel_next_iteration_setup = NULL; kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL; kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL; kernel_buffer_update = NULL;
} }
DeviceSplitKernel::~DeviceSplitKernel() DeviceSplitKernel::~DeviceSplitKernel()
{ {
split_data.free(); split_data.free();
ray_state.free(); ray_state.free();
use_queues_flag.free(); use_queues_flag.free();
queue_index.free(); queue_index.free();
work_pool_wgs.free(); work_pool_wgs.free();
delete kernel_path_init; delete kernel_path_init;
delete kernel_scene_intersect; delete kernel_scene_intersect;
delete kernel_lamp_emission; delete kernel_lamp_emission;
delete kernel_do_volume; delete kernel_do_volume;
delete kernel_queue_enqueue; delete kernel_queue_enqueue;
delete kernel_indirect_background; delete kernel_indirect_background;
delete kernel_shader_setup; delete kernel_shader_setup;
delete kernel_shader_sort; delete kernel_shader_sort;
delete kernel_shader_eval; delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao; delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter; delete kernel_subsurface_scatter;
delete kernel_direct_lighting; delete kernel_direct_lighting;
delete kernel_shadow_blocked_ao; delete kernel_shadow_blocked_ao;
delete kernel_shadow_blocked_dl; delete kernel_shadow_blocked_dl;
delete kernel_enqueue_inactive; delete kernel_enqueue_inactive;
delete kernel_next_iteration_setup; delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface; delete kernel_indirect_subsurface;
delete kernel_buffer_update; delete kernel_buffer_update;
} }
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features) bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
{ {
#define LOAD_KERNEL(name) \ #define LOAD_KERNEL(name) \
kernel_##name = get_split_kernel_function(#name, requested_features); \ kernel_##name = get_split_kernel_function(#name, requested_features); \
if(!kernel_##name) { \ if (!kernel_##name) { \
device->set_error(string("Split kernel error: failed to load kernel_") + #name); \ device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
return false; \ return false; \
} }
LOAD_KERNEL(path_init); LOAD_KERNEL(path_init);
LOAD_KERNEL(scene_intersect); LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission); LOAD_KERNEL(lamp_emission);
if (requested_features.use_volume) { if (requested_features.use_volume) {
LOAD_KERNEL(do_volume); LOAD_KERNEL(do_volume);
} }
LOAD_KERNEL(queue_enqueue); LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(indirect_background); LOAD_KERNEL(indirect_background);
LOAD_KERNEL(shader_setup); LOAD_KERNEL(shader_setup);
LOAD_KERNEL(shader_sort); LOAD_KERNEL(shader_sort);
LOAD_KERNEL(shader_eval); LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter); LOAD_KERNEL(subsurface_scatter);
LOAD_KERNEL(direct_lighting); LOAD_KERNEL(direct_lighting);
LOAD_KERNEL(shadow_blocked_ao); LOAD_KERNEL(shadow_blocked_ao);
LOAD_KERNEL(shadow_blocked_dl); LOAD_KERNEL(shadow_blocked_dl);
LOAD_KERNEL(enqueue_inactive); LOAD_KERNEL(enqueue_inactive);
LOAD_KERNEL(next_iteration_setup); LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface); LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update); LOAD_KERNEL(buffer_update);
#undef LOAD_KERNEL #undef LOAD_KERNEL
/* Re-initialiaze kernel-dependent data when kernels change. */ /* Re-initialiaze kernel-dependent data when kernels change. */
kernel_data_initialized = false; kernel_data_initialized = false;
return true; return true;
} }
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size) size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size)
{ {
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024; uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
VLOG(1) << "Split state element size: " VLOG(1) << "Split state element size: " << string_human_readable_number(size_per_element)
<< string_human_readable_number(size_per_element) << " bytes. (" << " bytes. (" << string_human_readable_size(size_per_element) << ").";
<< string_human_readable_size(size_per_element) << ")."; return max_buffer_size / size_per_element;
return max_buffer_size / size_per_element;
} }
bool DeviceSplitKernel::path_trace(DeviceTask *task, bool DeviceSplitKernel::path_trace(DeviceTask *task,
RenderTile& tile, RenderTile &tile,
device_memory& kgbuffer, device_memory &kgbuffer,
device_memory& kernel_data) device_memory &kernel_data)
{ {
if(device->have_error()) { if (device->have_error()) {
return false; return false;
} }
/* Allocate all required global memory once. */ /* Allocate all required global memory once. */
if(!kernel_data_initialized) { if (!kernel_data_initialized) {
kernel_data_initialized = true; kernel_data_initialized = true;
/* Set local size */ /* Set local size */
int2 lsize = split_kernel_local_size(); int2 lsize = split_kernel_local_size();
local_size[0] = lsize[0]; local_size[0] = lsize[0];
local_size[1] = lsize[1]; local_size[1] = lsize[1];
/* Set global size */ /* Set global size */
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task); int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
/* Make sure that set work size is a multiple of local /* Make sure that set work size is a multiple of local
* work size dimensions. * work size dimensions.
*/ */
global_size[0] = round_up(gsize[0], local_size[0]); global_size[0] = round_up(gsize[0], local_size[0]);
global_size[1] = round_up(gsize[1], local_size[1]); global_size[1] = round_up(gsize[1], local_size[1]);
int num_global_elements = global_size[0] * global_size[1]; int num_global_elements = global_size[0] * global_size[1];
assert(num_global_elements % WORK_POOL_SIZE == 0); assert(num_global_elements % WORK_POOL_SIZE == 0);
/* Calculate max groups */ /* Calculate max groups */
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */ /* Denotes the maximum work groups possible w.r.t. current requested tile size. */
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU; unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU :
unsigned int max_work_groups = num_global_elements / work_pool_size + 1; WORK_POOL_SIZE_GPU;
unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Allocate work_pool_wgs memory. */ /* Allocate work_pool_wgs memory. */
work_pool_wgs.alloc_to_device(max_work_groups); work_pool_wgs.alloc_to_device(max_work_groups);
queue_index.alloc_to_device(NUM_QUEUES); queue_index.alloc_to_device(NUM_QUEUES);
use_queues_flag.alloc_to_device(1); use_queues_flag.alloc_to_device(1);
split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
ray_state.alloc(num_global_elements); ray_state.alloc(num_global_elements);
} }
/* Number of elements in the global state buffer */ /* Number of elements in the global state buffer */
int num_global_elements = global_size[0] * global_size[1]; int num_global_elements = global_size[0] * global_size[1];
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
if(device->have_error()) { \ if (device->have_error()) { \
return false; \ return false; \
} \ } \
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \ if (!kernel_##name->enqueue( \
return false; \ KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
} return false; \
}
tile.sample = tile.start_sample; tile.sample = tile.start_sample;
/* for exponential increase between tile updates */ /* for exponential increase between tile updates */
int time_multiplier = 1; int time_multiplier = 1;
while(tile.sample < tile.start_sample + tile.num_samples) { while (tile.sample < tile.start_sample + tile.num_samples) {
/* to keep track of how long it takes to run a number of samples */ /* to keep track of how long it takes to run a number of samples */
double start_time = time_dt(); double start_time = time_dt();
/* initial guess to start rolling average */ /* initial guess to start rolling average */
const int initial_num_samples = 1; const int initial_num_samples = 1;
/* approx number of samples per second */ /* approx number of samples per second */
int samples_per_second = (avg_time_per_sample > 0.0) ? int samples_per_second = (avg_time_per_sample > 0.0) ?
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples; int(double(time_multiplier) / avg_time_per_sample) + 1 :
initial_num_samples;
RenderTile subtile = tile; RenderTile subtile = tile;
subtile.start_sample = tile.sample; subtile.start_sample = tile.sample;
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample); subtile.num_samples = min(samples_per_second,
tile.start_sample + tile.num_samples - tile.sample);
if(device->have_error()) { if (device->have_error()) {
return false; return false;
} }
/* reset state memory here as global size for data_init /* reset state memory here as global size for data_init
* kernel might not be large enough to do in kernel * kernel might not be large enough to do in kernel
*/ */
work_pool_wgs.zero_to_device(); work_pool_wgs.zero_to_device();
split_data.zero_to_device(); split_data.zero_to_device();
ray_state.zero_to_device(); ray_state.zero_to_device();
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), if (!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile, subtile,
num_global_elements, num_global_elements,
kgbuffer, kgbuffer,
kernel_data, kernel_data,
split_data, split_data,
ray_state, ray_state,
queue_index, queue_index,
use_queues_flag, use_queues_flag,
work_pool_wgs)) work_pool_wgs)) {
{ return false;
return false; }
}
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size); ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
bool activeRaysAvailable = true; bool activeRaysAvailable = true;
double cancel_time = DBL_MAX; double cancel_time = DBL_MAX;
while(activeRaysAvailable) { while (activeRaysAvailable) {
/* Do path-iteration in host [Enqueue Path-iteration kernels. */ /* Do path-iteration in host [Enqueue Path-iteration kernels. */
for(int PathIter = 0; PathIter < 16; PathIter++) { for (int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
if (kernel_do_volume) { if (kernel_do_volume) {
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size); ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
} }
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size); holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size); ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
if(task->get_cancel() && cancel_time == DBL_MAX) { if (task->get_cancel() && cancel_time == DBL_MAX) {
/* Wait up to twice as many seconds for current samples to finish /* Wait up to twice as many seconds for current samples to finish
* to avoid artifacts in render result from ending too soon. * to avoid artifacts in render result from ending too soon.
*/ */
cancel_time = time_dt() + 2.0 * time_multiplier; cancel_time = time_dt() + 2.0 * time_multiplier;
} }
if(time_dt() > cancel_time) { if (time_dt() > cancel_time) {
return true; return true;
} }
} }
/* Decide if we should exit path-iteration in host. */ /* Decide if we should exit path-iteration in host. */
ray_state.copy_from_device(0, global_size[0] * global_size[1], 1); ray_state.copy_from_device(0, global_size[0] * global_size[1], 1);
activeRaysAvailable = false; activeRaysAvailable = false;
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) { for (int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) { if (!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) { if (IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
/* Something went wrong, abort to avoid looping endlessly. */ /* Something went wrong, abort to avoid looping endlessly. */
device->set_error("Split kernel error: invalid ray state"); device->set_error("Split kernel error: invalid ray state");
return false; return false;
} }
/* Not all rays are RAY_INACTIVE. */ /* Not all rays are RAY_INACTIVE. */
activeRaysAvailable = true; activeRaysAvailable = true;
break; break;
} }
} }
if(time_dt() > cancel_time) { if (time_dt() > cancel_time) {
return true; return true;
} }
} }
double time_per_sample = ((time_dt()-start_time) / subtile.num_samples); double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
if(avg_time_per_sample == 0.0) { if (avg_time_per_sample == 0.0) {
/* start rolling average */ /* start rolling average */
avg_time_per_sample = time_per_sample; avg_time_per_sample = time_per_sample;
} }
else { else {
avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample; avg_time_per_sample = alpha * time_per_sample + (1.0 - alpha) * avg_time_per_sample;
} }
#undef ENQUEUE_SPLIT_KERNEL #undef ENQUEUE_SPLIT_KERNEL
tile.sample += subtile.num_samples; tile.sample += subtile.num_samples;
task->update_progress(&tile, tile.w*tile.h*subtile.num_samples); task->update_progress(&tile, tile.w * tile.h * subtile.num_samples);
time_multiplier = min(time_multiplier << 1, 10); time_multiplier = min(time_multiplier << 1, 10);
if(task->get_cancel()) { if (task->get_cancel()) {
return true; return true;
} }
} }
return true; return true;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -27,106 +27,115 @@ CCL_NAMESPACE_BEGIN
* Since some bytes may be needed for aligning chunks of memory; * Since some bytes may be needed for aligning chunks of memory;
* This is the amount of memory that we dedicate for that purpose. * This is the amount of memory that we dedicate for that purpose.
*/ */
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB #define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
/* Types used for split kernel */ /* Types used for split kernel */
class KernelDimensions { class KernelDimensions {
public: public:
size_t global_size[2]; size_t global_size[2];
size_t local_size[2]; size_t local_size[2];
KernelDimensions(size_t global_size_[2], size_t local_size_[2]) KernelDimensions(size_t global_size_[2], size_t local_size_[2])
{ {
memcpy(global_size, global_size_, sizeof(global_size)); memcpy(global_size, global_size_, sizeof(global_size));
memcpy(local_size, local_size_, sizeof(local_size)); memcpy(local_size, local_size_, sizeof(local_size));
} }
}; };
class SplitKernelFunction { class SplitKernelFunction {
public: public:
virtual ~SplitKernelFunction() {} virtual ~SplitKernelFunction()
{
}
/* enqueue the kernel, returns false if there is an error */ /* enqueue the kernel, returns false if there is an error */
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0; virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data) = 0;
}; };
class DeviceSplitKernel { class DeviceSplitKernel {
private: private:
Device *device; Device *device;
SplitKernelFunction *kernel_path_init; SplitKernelFunction *kernel_path_init;
SplitKernelFunction *kernel_scene_intersect; SplitKernelFunction *kernel_scene_intersect;
SplitKernelFunction *kernel_lamp_emission; SplitKernelFunction *kernel_lamp_emission;
SplitKernelFunction *kernel_do_volume; SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue; SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_indirect_background; SplitKernelFunction *kernel_indirect_background;
SplitKernelFunction *kernel_shader_setup; SplitKernelFunction *kernel_shader_setup;
SplitKernelFunction *kernel_shader_sort; SplitKernelFunction *kernel_shader_sort;
SplitKernelFunction *kernel_shader_eval; SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao; SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter; SplitKernelFunction *kernel_subsurface_scatter;
SplitKernelFunction *kernel_direct_lighting; SplitKernelFunction *kernel_direct_lighting;
SplitKernelFunction *kernel_shadow_blocked_ao; SplitKernelFunction *kernel_shadow_blocked_ao;
SplitKernelFunction *kernel_shadow_blocked_dl; SplitKernelFunction *kernel_shadow_blocked_dl;
SplitKernelFunction *kernel_enqueue_inactive; SplitKernelFunction *kernel_enqueue_inactive;
SplitKernelFunction *kernel_next_iteration_setup; SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface; SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update; SplitKernelFunction *kernel_buffer_update;
/* Global memory variables [porting]; These memory is used for /* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one * co-operation between different kernels; Data written by one
* kernel will be available to another kernel via this global * kernel will be available to another kernel via this global
* memory. * memory.
*/ */
device_only_memory<uchar> split_data; device_only_memory<uchar> split_data;
device_vector<uchar> ray_state; device_vector<uchar> ray_state;
device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */ device_only_memory<int>
queue_index; /* Array of size num_queues that tracks the size of each queue. */
/* Flag to make sceneintersect and lampemission kernel use queues. */ /* Flag to make sceneintersect and lampemission kernel use queues. */
device_only_memory<char> use_queues_flag; device_only_memory<char> use_queues_flag;
/* Approximate time it takes to complete one sample */ /* Approximate time it takes to complete one sample */
double avg_time_per_sample; double avg_time_per_sample;
/* Work pool with respect to each work group. */ /* Work pool with respect to each work group. */
device_only_memory<unsigned int> work_pool_wgs; device_only_memory<unsigned int> work_pool_wgs;
/* Cached kernel-dependent data, initialized once. */ /* Cached kernel-dependent data, initialized once. */
bool kernel_data_initialized; bool kernel_data_initialized;
size_t local_size[2]; size_t local_size[2];
size_t global_size[2]; size_t global_size[2];
public: public:
explicit DeviceSplitKernel(Device* device); explicit DeviceSplitKernel(Device *device);
virtual ~DeviceSplitKernel(); virtual ~DeviceSplitKernel();
bool load_kernels(const DeviceRequestedFeatures& requested_features); bool load_kernels(const DeviceRequestedFeatures &requested_features);
bool path_trace(DeviceTask *task, bool path_trace(DeviceTask *task,
RenderTile& rtile, RenderTile &rtile,
device_memory& kgbuffer, device_memory &kgbuffer,
device_memory& kernel_data); device_memory &kernel_data);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0; virtual uint64_t state_buffer_size(device_memory &kg,
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size); device_memory &data,
size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
RenderTile& rtile, RenderTile &rtile,
int num_global_elements, int num_global_elements,
device_memory& kernel_globals, device_memory &kernel_globals,
device_memory& kernel_data_, device_memory &kernel_data_,
device_memory& split_data, device_memory &split_data,
device_memory& ray_state, device_memory &ray_state,
device_memory& queue_index, device_memory &queue_index,
device_memory& use_queues_flag, device_memory &use_queues_flag,
device_memory& work_pool_wgs) = 0; device_memory &work_pool_wgs) = 0;
virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures&) = 0; const DeviceRequestedFeatures &) = 0;
virtual int2 split_kernel_local_size() = 0; virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0; virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
DeviceTask *task) = 0;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __DEVICE_SPLIT_KERNEL_H__ */ #endif /* __DEVICE_SPLIT_KERNEL_H__ */

View File

@@ -29,100 +29,111 @@ CCL_NAMESPACE_BEGIN
/* Device Task */ /* Device Task */
DeviceTask::DeviceTask(Type type_) DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0), : type(type_),
sample(0), num_samples(1), x(0),
shader_input(0), shader_output(0), y(0),
shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0) w(0),
h(0),
rgba_byte(0),
rgba_half(0),
buffer(0),
sample(0),
num_samples(1),
shader_input(0),
shader_output(0),
shader_eval_type(0),
shader_filter(0),
shader_x(0),
shader_w(0)
{ {
last_update_time = time_dt(); last_update_time = time_dt();
} }
int DeviceTask::get_subtask_count(int num, int max_size) int DeviceTask::get_subtask_count(int num, int max_size)
{ {
if(max_size != 0) { if (max_size != 0) {
int max_size_num; int max_size_num;
if(type == SHADER) { if (type == SHADER) {
max_size_num = (shader_w + max_size - 1)/max_size; max_size_num = (shader_w + max_size - 1) / max_size;
} }
else { else {
max_size = max(1, max_size/w); max_size = max(1, max_size / w);
max_size_num = (h + max_size - 1)/max_size; max_size_num = (h + max_size - 1) / max_size;
} }
num = max(max_size_num, num); num = max(max_size_num, num);
} }
if(type == SHADER) { if (type == SHADER) {
num = min(shader_w, num); num = min(shader_w, num);
} }
else if(type == RENDER) { else if (type == RENDER) {
} }
else { else {
num = min(h, num); num = min(h, num);
} }
return num; return num;
} }
void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size) void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
{ {
num = get_subtask_count(num, max_size); num = get_subtask_count(num, max_size);
if(type == SHADER) { if (type == SHADER) {
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
int tx = shader_x + (shader_w/num)*i; int tx = shader_x + (shader_w / num) * i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num; int tw = (i == num - 1) ? shader_w - i * (shader_w / num) : shader_w / num;
DeviceTask task = *this; DeviceTask task = *this;
task.shader_x = tx; task.shader_x = tx;
task.shader_w = tw; task.shader_w = tw;
tasks.push_back(task); tasks.push_back(task);
} }
} }
else if(type == RENDER) { else if (type == RENDER) {
for(int i = 0; i < num; i++) for (int i = 0; i < num; i++)
tasks.push_back(*this); tasks.push_back(*this);
} }
else { else {
for(int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
int ty = y + (h/num)*i; int ty = y + (h / num) * i;
int th = (i == num-1)? h - i*(h/num): h/num; int th = (i == num - 1) ? h - i * (h / num) : h / num;
DeviceTask task = *this; DeviceTask task = *this;
task.y = ty; task.y = ty;
task.h = th; task.h = th;
tasks.push_back(task); tasks.push_back(task);
} }
} }
} }
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples) void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{ {
if((type != RENDER) && if ((type != RENDER) && (type != SHADER))
(type != SHADER)) return;
return;
if(update_progress_sample) { if (update_progress_sample) {
if(pixel_samples == -1) { if (pixel_samples == -1) {
pixel_samples = shader_w; pixel_samples = shader_w;
} }
update_progress_sample(pixel_samples, rtile? rtile->sample : 0); update_progress_sample(pixel_samples, rtile ? rtile->sample : 0);
} }
if(update_tile_sample) { if (update_tile_sample) {
double current_time = time_dt(); double current_time = time_dt();
if(current_time - last_update_time >= 1.0) { if (current_time - last_update_time >= 1.0) {
update_tile_sample(*rtile); update_tile_sample(*rtile);
last_update_time = current_time; last_update_time = current_time;
} }
} }
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -33,87 +33,88 @@ class RenderTile;
class Tile; class Tile;
class DenoiseParams { class DenoiseParams {
public: public:
/* Pixel radius for neighbouring pixels to take into account. */ /* Pixel radius for neighbouring pixels to take into account. */
int radius; int radius;
/* Controls neighbor pixel weighting for the denoising filter. */ /* Controls neighbor pixel weighting for the denoising filter. */
float strength; float strength;
/* Preserve more or less detail based on feature passes. */ /* Preserve more or less detail based on feature passes. */
float feature_strength; float feature_strength;
/* When removing pixels that don't carry information, use a relative threshold instead of an absolute one. */ /* When removing pixels that don't carry information, use a relative threshold instead of an absolute one. */
bool relative_pca; bool relative_pca;
/* How many frames before and after the current center frame are included. */ /* How many frames before and after the current center frame are included. */
int neighbor_frames; int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */ /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input; bool clamp_input;
DenoiseParams() DenoiseParams()
{ {
radius = 8; radius = 8;
strength = 0.5f; strength = 0.5f;
feature_strength = 0.5f; feature_strength = 0.5f;
relative_pca = false; relative_pca = false;
neighbor_frames = 2; neighbor_frames = 2;
clamp_input = true; clamp_input = true;
} }
}; };
class DeviceTask : public Task { class DeviceTask : public Task {
public: public:
typedef enum { RENDER, FILM_CONVERT, SHADER } Type; typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
Type type; Type type;
int x, y, w, h; int x, y, w, h;
device_ptr rgba_byte; device_ptr rgba_byte;
device_ptr rgba_half; device_ptr rgba_half;
device_ptr buffer; device_ptr buffer;
int sample; int sample;
int num_samples; int num_samples;
int offset, stride; int offset, stride;
device_ptr shader_input; device_ptr shader_input;
device_ptr shader_output; device_ptr shader_output;
int shader_eval_type; int shader_eval_type;
int shader_filter; int shader_filter;
int shader_x, shader_w; int shader_x, shader_w;
int passes_size; int passes_size;
explicit DeviceTask(Type type = RENDER); explicit DeviceTask(Type type = RENDER);
int get_subtask_count(int num, int max_size = 0); int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask>& tasks, int num, int max_size = 0); void split(list<DeviceTask> &tasks, int num, int max_size = 0);
void update_progress(RenderTile *rtile, int pixel_samples = -1); void update_progress(RenderTile *rtile, int pixel_samples = -1);
function<bool(Device *device, RenderTile&)> acquire_tile; function<bool(Device *device, RenderTile &)> acquire_tile;
function<void(long, int)> update_progress_sample; function<void(long, int)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample; function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile&)> release_tile; function<void(RenderTile &)> release_tile;
function<bool()> get_cancel; function<bool()> get_cancel;
function<void(RenderTile*, Device*)> map_neighbor_tiles; function<void(RenderTile *, Device *)> map_neighbor_tiles;
function<void(RenderTile*, Device*)> unmap_neighbor_tiles; function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
DenoiseParams denoising; DenoiseParams denoising;
bool denoising_from_render; bool denoising_from_render;
vector<int> denoising_frames; vector<int> denoising_frames;
bool denoising_do_filter; bool denoising_do_filter;
bool denoising_write_passes; bool denoising_write_passes;
int pass_stride; int pass_stride;
int frame_stride; int frame_stride;
int target_pass_stride; int target_pass_stride;
int pass_denoising_data; int pass_denoising_data;
int pass_denoising_clean; int pass_denoising_clean;
bool need_finish_queue; bool need_finish_queue;
bool integrator_branched; bool integrator_branched;
int2 requested_tile_size; int2 requested_tile_size;
protected:
double last_update_time; protected:
double last_update_time;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __DEVICE_TASK_H__ */ #endif /* __DEVICE_TASK_H__ */

View File

@@ -16,241 +16,246 @@
#ifdef WITH_OPENCL #ifdef WITH_OPENCL
#include "util/util_foreach.h" # include "util/util_foreach.h"
#include "device/opencl/opencl.h" # include "device/opencl/opencl.h"
#include "device/opencl/memory_manager.h" # include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
void MemoryManager::DeviceBuffer::add_allocation(Allocation& allocation) void MemoryManager::DeviceBuffer::add_allocation(Allocation &allocation)
{ {
allocations.push_back(&allocation); allocations.push_back(&allocation);
} }
void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device) void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
{ {
bool need_realloc = false; bool need_realloc = false;
/* Calculate total size and remove any freed. */ /* Calculate total size and remove any freed. */
size_t total_size = 0; size_t total_size = 0;
for(int i = allocations.size()-1; i >= 0; i--) { for (int i = allocations.size() - 1; i >= 0; i--) {
Allocation* allocation = allocations[i]; Allocation *allocation = allocations[i];
/* Remove allocations that have been freed. */ /* Remove allocations that have been freed. */
if(!allocation->mem || allocation->mem->memory_size() == 0) { if (!allocation->mem || allocation->mem->memory_size() == 0) {
allocation->device_buffer = NULL; allocation->device_buffer = NULL;
allocation->size = 0; allocation->size = 0;
allocations.erase(allocations.begin()+i); allocations.erase(allocations.begin() + i);
need_realloc = true; need_realloc = true;
continue; continue;
} }
/* Get actual size for allocation. */ /* Get actual size for allocation. */
size_t alloc_size = align_up(allocation->mem->memory_size(), 16); size_t alloc_size = align_up(allocation->mem->memory_size(), 16);
if(allocation->size != alloc_size) { if (allocation->size != alloc_size) {
/* Allocation is either new or resized. */ /* Allocation is either new or resized. */
allocation->size = alloc_size; allocation->size = alloc_size;
allocation->needs_copy_to_device = true; allocation->needs_copy_to_device = true;
need_realloc = true; need_realloc = true;
} }
total_size += alloc_size; total_size += alloc_size;
} }
if(need_realloc) { if (need_realloc) {
cl_ulong max_buffer_size; cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL); clGetDeviceInfo(
device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if(total_size > max_buffer_size) { if (total_size > max_buffer_size) {
device->set_error("Scene too complex to fit in available memory."); device->set_error("Scene too complex to fit in available memory.");
return; return;
} }
device_only_memory<uchar> *new_buffer = device_only_memory<uchar> *new_buffer = new device_only_memory<uchar>(device,
new device_only_memory<uchar>(device, "memory manager buffer"); "memory manager buffer");
new_buffer->alloc_to_device(total_size); new_buffer->alloc_to_device(total_size);
size_t offset = 0; size_t offset = 0;
foreach(Allocation* allocation, allocations) { foreach (Allocation *allocation, allocations) {
if(allocation->needs_copy_to_device) { if (allocation->needs_copy_to_device) {
/* Copy from host to device. */ /* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue, opencl_device_assert(device,
CL_MEM_PTR(new_buffer->device_pointer), clEnqueueWriteBuffer(device->cqCommandQueue,
CL_FALSE, CL_MEM_PTR(new_buffer->device_pointer),
offset, CL_FALSE,
allocation->mem->memory_size(), offset,
allocation->mem->host_pointer, allocation->mem->memory_size(),
0, NULL, NULL allocation->mem->host_pointer,
)); 0,
NULL,
NULL));
allocation->needs_copy_to_device = false; allocation->needs_copy_to_device = false;
} }
else { else {
/* Fast copy from memory already on device. */ /* Fast copy from memory already on device. */
opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue, opencl_device_assert(device,
CL_MEM_PTR(buffer->device_pointer), clEnqueueCopyBuffer(device->cqCommandQueue,
CL_MEM_PTR(new_buffer->device_pointer), CL_MEM_PTR(buffer->device_pointer),
allocation->desc.offset, CL_MEM_PTR(new_buffer->device_pointer),
offset, allocation->desc.offset,
allocation->mem->memory_size(), offset,
0, NULL, NULL allocation->mem->memory_size(),
)); 0,
} NULL,
NULL));
}
allocation->desc.offset = offset; allocation->desc.offset = offset;
offset += allocation->size; offset += allocation->size;
} }
delete buffer; delete buffer;
buffer = new_buffer; buffer = new_buffer;
} }
else { else {
assert(total_size == buffer->data_size); assert(total_size == buffer->data_size);
size_t offset = 0; size_t offset = 0;
foreach(Allocation* allocation, allocations) { foreach (Allocation *allocation, allocations) {
if(allocation->needs_copy_to_device) { if (allocation->needs_copy_to_device) {
/* Copy from host to device. */ /* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue, opencl_device_assert(device,
CL_MEM_PTR(buffer->device_pointer), clEnqueueWriteBuffer(device->cqCommandQueue,
CL_FALSE, CL_MEM_PTR(buffer->device_pointer),
offset, CL_FALSE,
allocation->mem->memory_size(), offset,
allocation->mem->host_pointer, allocation->mem->memory_size(),
0, NULL, NULL allocation->mem->host_pointer,
)); 0,
NULL,
NULL));
allocation->needs_copy_to_device = false; allocation->needs_copy_to_device = false;
} }
offset += allocation->size; offset += allocation->size;
} }
} }
/* Not really necessary, but seems to improve responsiveness for some reason. */ /* Not really necessary, but seems to improve responsiveness for some reason. */
clFinish(device->cqCommandQueue); clFinish(device->cqCommandQueue);
} }
void MemoryManager::DeviceBuffer::free(OpenCLDevice *) void MemoryManager::DeviceBuffer::free(OpenCLDevice *)
{ {
buffer->free(); buffer->free();
} }
MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() MemoryManager::DeviceBuffer *MemoryManager::smallest_device_buffer()
{ {
DeviceBuffer* smallest = device_buffers; DeviceBuffer *smallest = device_buffers;
foreach(DeviceBuffer& device_buffer, device_buffers) { foreach (DeviceBuffer &device_buffer, device_buffers) {
if(device_buffer.size < smallest->size) { if (device_buffer.size < smallest->size) {
smallest = &device_buffer; smallest = &device_buffer;
} }
} }
return smallest; return smallest;
} }
MemoryManager::MemoryManager(OpenCLDevice *device) MemoryManager::MemoryManager(OpenCLDevice *device) : device(device), need_update(false)
: device(device), need_update(false)
{ {
foreach(DeviceBuffer& device_buffer, device_buffers) { foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.buffer = device_buffer.buffer = new device_only_memory<uchar>(device, "memory manager buffer");
new device_only_memory<uchar>(device, "memory manager buffer"); }
}
} }
void MemoryManager::free() void MemoryManager::free()
{ {
foreach(DeviceBuffer& device_buffer, device_buffers) { foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.free(device); device_buffer.free(device);
} }
} }
void MemoryManager::alloc(const char *name, device_memory& mem) void MemoryManager::alloc(const char *name, device_memory &mem)
{ {
Allocation& allocation = allocations[name]; Allocation &allocation = allocations[name];
allocation.mem = &mem; allocation.mem = &mem;
allocation.needs_copy_to_device = true; allocation.needs_copy_to_device = true;
if(!allocation.device_buffer) { if (!allocation.device_buffer) {
DeviceBuffer* device_buffer = smallest_device_buffer(); DeviceBuffer *device_buffer = smallest_device_buffer();
allocation.device_buffer = device_buffer; allocation.device_buffer = device_buffer;
allocation.desc.device_buffer = device_buffer - device_buffers; allocation.desc.device_buffer = device_buffer - device_buffers;
device_buffer->add_allocation(allocation); device_buffer->add_allocation(allocation);
device_buffer->size += mem.memory_size(); device_buffer->size += mem.memory_size();
} }
need_update = true; need_update = true;
} }
bool MemoryManager::free(device_memory& mem) bool MemoryManager::free(device_memory &mem)
{ {
foreach(AllocationsMap::value_type& value, allocations) { foreach (AllocationsMap::value_type &value, allocations) {
Allocation& allocation = value.second; Allocation &allocation = value.second;
if(allocation.mem == &mem) { if (allocation.mem == &mem) {
allocation.device_buffer->size -= mem.memory_size(); allocation.device_buffer->size -= mem.memory_size();
allocation.mem = NULL; allocation.mem = NULL;
allocation.needs_copy_to_device = false; allocation.needs_copy_to_device = false;
need_update = true; need_update = true;
return true; return true;
} }
} }
return false; return false;
} }
MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name) MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name)
{ {
update_device_memory(); update_device_memory();
Allocation& allocation = allocations[name]; Allocation &allocation = allocations[name];
return allocation.desc; return allocation.desc;
} }
void MemoryManager::update_device_memory() void MemoryManager::update_device_memory()
{ {
if(!need_update) { if (!need_update) {
return; return;
} }
need_update = false; need_update = false;
foreach(DeviceBuffer& device_buffer, device_buffers) { foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.update_device_memory(device); device_buffer.update_device_memory(device);
} }
} }
void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg) void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
{ {
update_device_memory(); update_device_memory();
foreach(DeviceBuffer& device_buffer, device_buffers) { foreach (DeviceBuffer &device_buffer, device_buffers) {
if(device_buffer.buffer->device_pointer) { if (device_buffer.buffer->device_pointer) {
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer); device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
} }
else { else {
device->kernel_set_args(kernel, (*narg)++, device->null_mem); device->kernel_set_args(kernel, (*narg)++, device->null_mem);
} }
} }
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* WITH_OPENCL */ #endif /* WITH_OPENCL */

View File

@@ -29,78 +29,77 @@ CCL_NAMESPACE_BEGIN
class OpenCLDevice; class OpenCLDevice;
class MemoryManager { class MemoryManager {
public: public:
static const int NUM_DEVICE_BUFFERS = 8; static const int NUM_DEVICE_BUFFERS = 8;
struct BufferDescriptor { struct BufferDescriptor {
uint device_buffer; uint device_buffer;
cl_ulong offset; cl_ulong offset;
}; };
private: private:
struct DeviceBuffer; struct DeviceBuffer;
struct Allocation { struct Allocation {
device_memory *mem; device_memory *mem;
DeviceBuffer *device_buffer; DeviceBuffer *device_buffer;
size_t size; /* Size of actual allocation, may be larger than requested. */ size_t size; /* Size of actual allocation, may be larger than requested. */
BufferDescriptor desc; BufferDescriptor desc;
bool needs_copy_to_device; bool needs_copy_to_device;
Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false) Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false)
{ {
} }
}; };
struct DeviceBuffer { struct DeviceBuffer {
device_only_memory<uchar> *buffer; device_only_memory<uchar> *buffer;
vector<Allocation*> allocations; vector<Allocation *> allocations;
size_t size; /* Size of all allocations. */ size_t size; /* Size of all allocations. */
DeviceBuffer() DeviceBuffer() : buffer(NULL), size(0)
: buffer(NULL), size(0) {
{ }
}
~DeviceBuffer() ~DeviceBuffer()
{ {
delete buffer; delete buffer;
buffer = NULL; buffer = NULL;
} }
void add_allocation(Allocation& allocation); void add_allocation(Allocation &allocation);
void update_device_memory(OpenCLDevice *device); void update_device_memory(OpenCLDevice *device);
void free(OpenCLDevice *device); void free(OpenCLDevice *device);
}; };
OpenCLDevice *device; OpenCLDevice *device;
DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS]; DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS];
typedef unordered_map<string, Allocation> AllocationsMap; typedef unordered_map<string, Allocation> AllocationsMap;
AllocationsMap allocations; AllocationsMap allocations;
bool need_update; bool need_update;
DeviceBuffer* smallest_device_buffer(); DeviceBuffer *smallest_device_buffer();
public: public:
MemoryManager(OpenCLDevice *device); MemoryManager(OpenCLDevice *device);
void free(); /* Free all memory. */ void free(); /* Free all memory. */
void alloc(const char *name, device_memory& mem); void alloc(const char *name, device_memory &mem);
bool free(device_memory& mem); bool free(device_memory &mem);
BufferDescriptor get_descriptor(string name); BufferDescriptor get_descriptor(string name);
void update_device_memory(); void update_device_memory();
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg); void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,11 @@
set(LICENSES set(LICENSES
Apache_2.0.txt Apache_2.0.txt
ILM.txt ILM.txt
NVidia.txt NVidia.txt
OSL.txt OSL.txt
Sobol.txt Sobol.txt
readme.txt readme.txt
) )
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${LICENSES}" ${CYCLES_INSTALL_PATH}/license) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${LICENSES}" ${CYCLES_INSTALL_PATH}/license)

View File

@@ -1,19 +1,19 @@
set(INC set(INC
.. ..
) )
set(SRC set(SRC
node.cpp node.cpp
node_type.cpp node_type.cpp
node_xml.cpp node_xml.cpp
) )
set(SRC_HEADERS set(SRC_HEADERS
node.h node.h
node_enum.h node_enum.h
node_type.h node_type.h
node_xml.h node_xml.h
) )
set(LIB set(LIB

View File

@@ -26,550 +26,645 @@ CCL_NAMESPACE_BEGIN
/* Node Type */ /* Node Type */
Node::Node(const NodeType *type_, ustring name_) Node::Node(const NodeType *type_, ustring name_) : name(name_), type(type_)
: name(name_), type(type_)
{ {
assert(type); assert(type);
/* assign non-empty name, convenient for debugging */ /* assign non-empty name, convenient for debugging */
if(name.empty()) { if (name.empty()) {
name = type->name; name = type->name;
} }
/* initialize default values */ /* initialize default values */
foreach(const SocketType& socket, type->inputs) { foreach (const SocketType &socket, type->inputs) {
set_default_value(socket); set_default_value(socket);
} }
} }
Node::~Node() Node::~Node()
{ {
} }
template<typename T> template<typename T> static T &get_socket_value(const Node *node, const SocketType &socket)
static T& get_socket_value(const Node *node, const SocketType& socket)
{ {
return (T&)*(((char*)node) + socket.struct_offset); return (T &)*(((char *)node) + socket.struct_offset);
} }
#ifndef NDEBUG #ifndef NDEBUG
static bool is_socket_float3(const SocketType& socket) static bool is_socket_float3(const SocketType &socket)
{ {
return socket.type == SocketType::COLOR || return socket.type == SocketType::COLOR || socket.type == SocketType::POINT ||
socket.type == SocketType::POINT || socket.type == SocketType::VECTOR || socket.type == SocketType::NORMAL;
socket.type == SocketType::VECTOR ||
socket.type == SocketType::NORMAL;
} }
static bool is_socket_array_float3(const SocketType& socket) static bool is_socket_array_float3(const SocketType &socket)
{ {
return socket.type == SocketType::COLOR_ARRAY || return socket.type == SocketType::COLOR_ARRAY || socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::POINT_ARRAY || socket.type == SocketType::VECTOR_ARRAY || socket.type == SocketType::NORMAL_ARRAY;
socket.type == SocketType::VECTOR_ARRAY ||
socket.type == SocketType::NORMAL_ARRAY;
} }
#endif #endif
/* set values */ /* set values */
void Node::set(const SocketType& input, bool value) void Node::set(const SocketType &input, bool value)
{ {
assert(input.type == SocketType::BOOLEAN); assert(input.type == SocketType::BOOLEAN);
get_socket_value<bool>(this, input) = value; get_socket_value<bool>(this, input) = value;
} }
void Node::set(const SocketType& input, int value) void Node::set(const SocketType &input, int value)
{ {
assert((input.type == SocketType::INT || input.type == SocketType::ENUM)); assert((input.type == SocketType::INT || input.type == SocketType::ENUM));
get_socket_value<int>(this, input) = value; get_socket_value<int>(this, input) = value;
} }
void Node::set(const SocketType& input, uint value) void Node::set(const SocketType &input, uint value)
{ {
assert(input.type == SocketType::UINT); assert(input.type == SocketType::UINT);
get_socket_value<uint>(this, input) = value; get_socket_value<uint>(this, input) = value;
} }
void Node::set(const SocketType& input, float value) void Node::set(const SocketType &input, float value)
{ {
assert(input.type == SocketType::FLOAT); assert(input.type == SocketType::FLOAT);
get_socket_value<float>(this, input) = value; get_socket_value<float>(this, input) = value;
} }
void Node::set(const SocketType& input, float2 value) void Node::set(const SocketType &input, float2 value)
{ {
assert(input.type == SocketType::FLOAT); assert(input.type == SocketType::FLOAT);
get_socket_value<float2>(this, input) = value; get_socket_value<float2>(this, input) = value;
} }
void Node::set(const SocketType& input, float3 value) void Node::set(const SocketType &input, float3 value)
{ {
assert(is_socket_float3(input)); assert(is_socket_float3(input));
get_socket_value<float3>(this, input) = value; get_socket_value<float3>(this, input) = value;
} }
void Node::set(const SocketType& input, const char *value) void Node::set(const SocketType &input, const char *value)
{ {
set(input, ustring(value)); set(input, ustring(value));
} }
void Node::set(const SocketType& input, ustring value) void Node::set(const SocketType &input, ustring value)
{ {
if(input.type == SocketType::STRING) { if (input.type == SocketType::STRING) {
get_socket_value<ustring>(this, input) = value; get_socket_value<ustring>(this, input) = value;
} }
else if(input.type == SocketType::ENUM) { else if (input.type == SocketType::ENUM) {
const NodeEnum& enm = *input.enum_values; const NodeEnum &enm = *input.enum_values;
if(enm.exists(value)) { if (enm.exists(value)) {
get_socket_value<int>(this, input) = enm[value]; get_socket_value<int>(this, input) = enm[value];
} }
else { else {
assert(0); assert(0);
} }
} }
else { else {
assert(0); assert(0);
} }
} }
void Node::set(const SocketType& input, const Transform& value) void Node::set(const SocketType &input, const Transform &value)
{ {
assert(input.type == SocketType::TRANSFORM); assert(input.type == SocketType::TRANSFORM);
get_socket_value<Transform>(this, input) = value; get_socket_value<Transform>(this, input) = value;
} }
void Node::set(const SocketType& input, Node *value) void Node::set(const SocketType &input, Node *value)
{ {
assert(input.type == SocketType::TRANSFORM); assert(input.type == SocketType::TRANSFORM);
get_socket_value<Node*>(this, input) = value; get_socket_value<Node *>(this, input) = value;
} }
/* set array values */ /* set array values */
void Node::set(const SocketType& input, array<bool>& value) void Node::set(const SocketType &input, array<bool> &value)
{ {
assert(input.type == SocketType::BOOLEAN_ARRAY); assert(input.type == SocketType::BOOLEAN_ARRAY);
get_socket_value<array<bool> >(this, input).steal_data(value); get_socket_value<array<bool>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<int>& value) void Node::set(const SocketType &input, array<int> &value)
{ {
assert(input.type == SocketType::INT_ARRAY); assert(input.type == SocketType::INT_ARRAY);
get_socket_value<array<int> >(this, input).steal_data(value); get_socket_value<array<int>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<float>& value) void Node::set(const SocketType &input, array<float> &value)
{ {
assert(input.type == SocketType::FLOAT_ARRAY); assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float> >(this, input).steal_data(value); get_socket_value<array<float>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<float2>& value) void Node::set(const SocketType &input, array<float2> &value)
{ {
assert(input.type == SocketType::FLOAT_ARRAY); assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float2> >(this, input).steal_data(value); get_socket_value<array<float2>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<float3>& value) void Node::set(const SocketType &input, array<float3> &value)
{ {
assert(is_socket_array_float3(input)); assert(is_socket_array_float3(input));
get_socket_value<array<float3> >(this, input).steal_data(value); get_socket_value<array<float3>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<ustring>& value) void Node::set(const SocketType &input, array<ustring> &value)
{ {
assert(input.type == SocketType::STRING_ARRAY); assert(input.type == SocketType::STRING_ARRAY);
get_socket_value<array<ustring> >(this, input).steal_data(value); get_socket_value<array<ustring>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<Transform>& value) void Node::set(const SocketType &input, array<Transform> &value)
{ {
assert(input.type == SocketType::TRANSFORM_ARRAY); assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Transform> >(this, input).steal_data(value); get_socket_value<array<Transform>>(this, input).steal_data(value);
} }
void Node::set(const SocketType& input, array<Node*>& value) void Node::set(const SocketType &input, array<Node *> &value)
{ {
assert(input.type == SocketType::TRANSFORM_ARRAY); assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Node*> >(this, input).steal_data(value); get_socket_value<array<Node *>>(this, input).steal_data(value);
} }
/* get values */ /* get values */
bool Node::get_bool(const SocketType& input) const bool Node::get_bool(const SocketType &input) const
{ {
assert(input.type == SocketType::BOOLEAN); assert(input.type == SocketType::BOOLEAN);
return get_socket_value<bool>(this, input); return get_socket_value<bool>(this, input);
} }
int Node::get_int(const SocketType& input) const int Node::get_int(const SocketType &input) const
{ {
assert(input.type == SocketType::INT || input.type == SocketType::ENUM); assert(input.type == SocketType::INT || input.type == SocketType::ENUM);
return get_socket_value<int>(this, input); return get_socket_value<int>(this, input);
} }
uint Node::get_uint(const SocketType& input) const uint Node::get_uint(const SocketType &input) const
{ {
assert(input.type == SocketType::UINT); assert(input.type == SocketType::UINT);
return get_socket_value<uint>(this, input); return get_socket_value<uint>(this, input);
} }
float Node::get_float(const SocketType& input) const float Node::get_float(const SocketType &input) const
{ {
assert(input.type == SocketType::FLOAT); assert(input.type == SocketType::FLOAT);
return get_socket_value<float>(this, input); return get_socket_value<float>(this, input);
} }
float2 Node::get_float2(const SocketType& input) const float2 Node::get_float2(const SocketType &input) const
{ {
assert(input.type == SocketType::FLOAT); assert(input.type == SocketType::FLOAT);
return get_socket_value<float2>(this, input); return get_socket_value<float2>(this, input);
} }
float3 Node::get_float3(const SocketType& input) const float3 Node::get_float3(const SocketType &input) const
{ {
assert(is_socket_float3(input)); assert(is_socket_float3(input));
return get_socket_value<float3>(this, input); return get_socket_value<float3>(this, input);
} }
ustring Node::get_string(const SocketType& input) const ustring Node::get_string(const SocketType &input) const
{ {
if(input.type == SocketType::STRING) { if (input.type == SocketType::STRING) {
return get_socket_value<ustring>(this, input); return get_socket_value<ustring>(this, input);
} }
else if(input.type == SocketType::ENUM) { else if (input.type == SocketType::ENUM) {
const NodeEnum& enm = *input.enum_values; const NodeEnum &enm = *input.enum_values;
int intvalue = get_socket_value<int>(this, input); int intvalue = get_socket_value<int>(this, input);
return (enm.exists(intvalue)) ? enm[intvalue] : ustring(); return (enm.exists(intvalue)) ? enm[intvalue] : ustring();
} }
else { else {
assert(0); assert(0);
return ustring(); return ustring();
} }
} }
Transform Node::get_transform(const SocketType& input) const Transform Node::get_transform(const SocketType &input) const
{ {
assert(input.type == SocketType::TRANSFORM); assert(input.type == SocketType::TRANSFORM);
return get_socket_value<Transform>(this, input); return get_socket_value<Transform>(this, input);
} }
Node *Node::get_node(const SocketType& input) const Node *Node::get_node(const SocketType &input) const
{ {
assert(input.type == SocketType::NODE); assert(input.type == SocketType::NODE);
return get_socket_value<Node*>(this, input); return get_socket_value<Node *>(this, input);
} }
/* get array values */ /* get array values */
const array<bool>& Node::get_bool_array(const SocketType& input) const const array<bool> &Node::get_bool_array(const SocketType &input) const
{ {
assert(input.type == SocketType::BOOLEAN_ARRAY); assert(input.type == SocketType::BOOLEAN_ARRAY);
return get_socket_value<array<bool> >(this, input); return get_socket_value<array<bool>>(this, input);
} }
const array<int>& Node::get_int_array(const SocketType& input) const const array<int> &Node::get_int_array(const SocketType &input) const
{ {
assert(input.type == SocketType::INT_ARRAY); assert(input.type == SocketType::INT_ARRAY);
return get_socket_value<array<int> >(this, input); return get_socket_value<array<int>>(this, input);
} }
const array<float>& Node::get_float_array(const SocketType& input) const const array<float> &Node::get_float_array(const SocketType &input) const
{ {
assert(input.type == SocketType::FLOAT_ARRAY); assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float> >(this, input); return get_socket_value<array<float>>(this, input);
} }
const array<float2>& Node::get_float2_array(const SocketType& input) const const array<float2> &Node::get_float2_array(const SocketType &input) const
{ {
assert(input.type == SocketType::FLOAT_ARRAY); assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float2> >(this, input); return get_socket_value<array<float2>>(this, input);
} }
const array<float3>& Node::get_float3_array(const SocketType& input) const const array<float3> &Node::get_float3_array(const SocketType &input) const
{ {
assert(is_socket_array_float3(input)); assert(is_socket_array_float3(input));
return get_socket_value<array<float3> >(this, input); return get_socket_value<array<float3>>(this, input);
} }
const array<ustring>& Node::get_string_array(const SocketType& input) const const array<ustring> &Node::get_string_array(const SocketType &input) const
{ {
assert(input.type == SocketType::STRING_ARRAY); assert(input.type == SocketType::STRING_ARRAY);
return get_socket_value<array<ustring> >(this, input); return get_socket_value<array<ustring>>(this, input);
} }
const array<Transform>& Node::get_transform_array(const SocketType& input) const const array<Transform> &Node::get_transform_array(const SocketType &input) const
{ {
assert(input.type == SocketType::TRANSFORM_ARRAY); assert(input.type == SocketType::TRANSFORM_ARRAY);
return get_socket_value<array<Transform> >(this, input); return get_socket_value<array<Transform>>(this, input);
} }
const array<Node*>& Node::get_node_array(const SocketType& input) const const array<Node *> &Node::get_node_array(const SocketType &input) const
{ {
assert(input.type == SocketType::NODE_ARRAY); assert(input.type == SocketType::NODE_ARRAY);
return get_socket_value<array<Node*> >(this, input); return get_socket_value<array<Node *>>(this, input);
} }
/* generic value operations */ /* generic value operations */
bool Node::has_default_value(const SocketType& input) const bool Node::has_default_value(const SocketType &input) const
{ {
const void *src = input.default_value; const void *src = input.default_value;
void *dst = &get_socket_value<char>(this, input); void *dst = &get_socket_value<char>(this, input);
return memcmp(dst, src, input.size()) == 0; return memcmp(dst, src, input.size()) == 0;
} }
void Node::set_default_value(const SocketType& socket) void Node::set_default_value(const SocketType &socket)
{ {
const void *src = socket.default_value; const void *src = socket.default_value;
void *dst = ((char*)this) + socket.struct_offset; void *dst = ((char *)this) + socket.struct_offset;
memcpy(dst, src, socket.size()); memcpy(dst, src, socket.size());
} }
template<typename T> template<typename T>
static void copy_array(const Node *node, const SocketType& socket, const Node *other, const SocketType& other_socket) static void copy_array(const Node *node,
const SocketType &socket,
const Node *other,
const SocketType &other_socket)
{ {
const array<T>* src = (const array<T>*)(((char*)other) + other_socket.struct_offset); const array<T> *src = (const array<T> *)(((char *)other) + other_socket.struct_offset);
array<T>* dst = (array<T>*)(((char*)node) + socket.struct_offset); array<T> *dst = (array<T> *)(((char *)node) + socket.struct_offset);
*dst = *src; *dst = *src;
} }
void Node::copy_value(const SocketType& socket, const Node& other, const SocketType& other_socket) void Node::copy_value(const SocketType &socket, const Node &other, const SocketType &other_socket)
{ {
assert(socket.type == other_socket.type); assert(socket.type == other_socket.type);
if(socket.is_array()) { if (socket.is_array()) {
switch(socket.type) { switch (socket.type) {
case SocketType::BOOLEAN_ARRAY: copy_array<bool>(this, socket, &other, other_socket); break; case SocketType::BOOLEAN_ARRAY:
case SocketType::FLOAT_ARRAY: copy_array<float>(this, socket, &other, other_socket); break; copy_array<bool>(this, socket, &other, other_socket);
case SocketType::INT_ARRAY: copy_array<int>(this, socket, &other, other_socket); break; break;
case SocketType::COLOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break; case SocketType::FLOAT_ARRAY:
case SocketType::VECTOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break; copy_array<float>(this, socket, &other, other_socket);
case SocketType::POINT_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break; break;
case SocketType::NORMAL_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break; case SocketType::INT_ARRAY:
case SocketType::POINT2_ARRAY: copy_array<float2>(this, socket, &other, other_socket); break; copy_array<int>(this, socket, &other, other_socket);
case SocketType::STRING_ARRAY: copy_array<ustring>(this, socket, &other, other_socket); break; break;
case SocketType::TRANSFORM_ARRAY: copy_array<Transform>(this, socket, &other, other_socket); break; case SocketType::COLOR_ARRAY:
case SocketType::NODE_ARRAY: copy_array<void*>(this, socket, &other, other_socket); break; copy_array<float3>(this, socket, &other, other_socket);
default: assert(0); break; break;
} case SocketType::VECTOR_ARRAY:
} copy_array<float3>(this, socket, &other, other_socket);
else { break;
const void *src = ((char*)&other) + other_socket.struct_offset; case SocketType::POINT_ARRAY:
void *dst = ((char*)this) + socket.struct_offset; copy_array<float3>(this, socket, &other, other_socket);
memcpy(dst, src, socket.size()); break;
} case SocketType::NORMAL_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT2_ARRAY:
copy_array<float2>(this, socket, &other, other_socket);
break;
case SocketType::STRING_ARRAY:
copy_array<ustring>(this, socket, &other, other_socket);
break;
case SocketType::TRANSFORM_ARRAY:
copy_array<Transform>(this, socket, &other, other_socket);
break;
case SocketType::NODE_ARRAY:
copy_array<void *>(this, socket, &other, other_socket);
break;
default:
assert(0);
break;
}
}
else {
const void *src = ((char *)&other) + other_socket.struct_offset;
void *dst = ((char *)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
}
} }
template<typename T> template<typename T>
static bool is_array_equal(const Node *node, const Node *other, const SocketType& socket) static bool is_array_equal(const Node *node, const Node *other, const SocketType &socket)
{ {
const array<T>* a = (const array<T>*)(((char*)node) + socket.struct_offset); const array<T> *a = (const array<T> *)(((char *)node) + socket.struct_offset);
const array<T>* b = (const array<T>*)(((char*)other) + socket.struct_offset); const array<T> *b = (const array<T> *)(((char *)other) + socket.struct_offset);
return *a == *b; return *a == *b;
} }
template<typename T> template<typename T>
static bool is_value_equal(const Node *node, const Node *other, const SocketType& socket) static bool is_value_equal(const Node *node, const Node *other, const SocketType &socket)
{ {
const T *a = (const T*)(((char*)node) + socket.struct_offset); const T *a = (const T *)(((char *)node) + socket.struct_offset);
const T *b = (const T*)(((char*)other) + socket.struct_offset); const T *b = (const T *)(((char *)other) + socket.struct_offset);
return *a == *b; return *a == *b;
} }
bool Node::equals_value(const Node& other, const SocketType& socket) const bool Node::equals_value(const Node &other, const SocketType &socket) const
{ {
switch(socket.type) { switch (socket.type) {
case SocketType::BOOLEAN: return is_value_equal<bool>(this, &other, socket); case SocketType::BOOLEAN:
case SocketType::FLOAT: return is_value_equal<float>(this, &other, socket); return is_value_equal<bool>(this, &other, socket);
case SocketType::INT: return is_value_equal<int>(this, &other, socket); case SocketType::FLOAT:
case SocketType::UINT: return is_value_equal<uint>(this, &other, socket); return is_value_equal<float>(this, &other, socket);
case SocketType::COLOR: return is_value_equal<float3>(this, &other, socket); case SocketType::INT:
case SocketType::VECTOR: return is_value_equal<float3>(this, &other, socket); return is_value_equal<int>(this, &other, socket);
case SocketType::POINT: return is_value_equal<float3>(this, &other, socket); case SocketType::UINT:
case SocketType::NORMAL: return is_value_equal<float3>(this, &other, socket); return is_value_equal<uint>(this, &other, socket);
case SocketType::POINT2: return is_value_equal<float2>(this, &other, socket); case SocketType::COLOR:
case SocketType::CLOSURE: return true; return is_value_equal<float3>(this, &other, socket);
case SocketType::STRING: return is_value_equal<ustring>(this, &other, socket); case SocketType::VECTOR:
case SocketType::ENUM: return is_value_equal<int>(this, &other, socket); return is_value_equal<float3>(this, &other, socket);
case SocketType::TRANSFORM: return is_value_equal<Transform>(this, &other, socket); case SocketType::POINT:
case SocketType::NODE: return is_value_equal<void*>(this, &other, socket); return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2:
return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE:
return true;
case SocketType::STRING:
return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM:
return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM:
return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE:
return is_value_equal<void *>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY: return is_array_equal<bool>(this, &other, socket); case SocketType::BOOLEAN_ARRAY:
case SocketType::FLOAT_ARRAY: return is_array_equal<float>(this, &other, socket); return is_array_equal<bool>(this, &other, socket);
case SocketType::INT_ARRAY: return is_array_equal<int>(this, &other, socket); case SocketType::FLOAT_ARRAY:
case SocketType::COLOR_ARRAY: return is_array_equal<float3>(this, &other, socket); return is_array_equal<float>(this, &other, socket);
case SocketType::VECTOR_ARRAY: return is_array_equal<float3>(this, &other, socket); case SocketType::INT_ARRAY:
case SocketType::POINT_ARRAY: return is_array_equal<float3>(this, &other, socket); return is_array_equal<int>(this, &other, socket);
case SocketType::NORMAL_ARRAY: return is_array_equal<float3>(this, &other, socket); case SocketType::COLOR_ARRAY:
case SocketType::POINT2_ARRAY: return is_array_equal<float2>(this, &other, socket); return is_array_equal<float3>(this, &other, socket);
case SocketType::STRING_ARRAY: return is_array_equal<ustring>(this, &other, socket); case SocketType::VECTOR_ARRAY:
case SocketType::TRANSFORM_ARRAY: return is_array_equal<Transform>(this, &other, socket); return is_array_equal<float3>(this, &other, socket);
case SocketType::NODE_ARRAY: return is_array_equal<void*>(this, &other, socket); case SocketType::POINT_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY:
return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY:
return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY:
return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY:
return is_array_equal<void *>(this, &other, socket);
case SocketType::UNDEFINED: return true; case SocketType::UNDEFINED:
} return true;
}
return true; return true;
} }
/* equals */ /* equals */
bool Node::equals(const Node& other) const bool Node::equals(const Node &other) const
{ {
assert(type == other.type); assert(type == other.type);
foreach(const SocketType& socket, type->inputs) { foreach (const SocketType &socket, type->inputs) {
if(!equals_value(other, socket)) if (!equals_value(other, socket))
return false; return false;
} }
return true; return true;
} }
/* Hash */ /* Hash */
namespace { namespace {
template<typename T> template<typename T> void value_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
void value_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
{ {
md5.append(((uint8_t*)node) + socket.struct_offset, socket.size()); md5.append(((uint8_t *)node) + socket.struct_offset, socket.size());
} }
void float3_hash(const Node *node, const SocketType& socket, MD5Hash& md5) void float3_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{ {
/* Don't compare 4th element used for padding. */ /* Don't compare 4th element used for padding. */
md5.append(((uint8_t*)node) + socket.struct_offset, sizeof(float) * 3); md5.append(((uint8_t *)node) + socket.struct_offset, sizeof(float) * 3);
} }
template<typename T> template<typename T> void array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
void array_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
{ {
const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset); const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
for(size_t i = 0; i < a.size(); i++) { for (size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t*)&a[i], sizeof(T)); md5.append((uint8_t *)&a[i], sizeof(T));
} }
} }
void float3_array_hash(const Node *node, const SocketType& socket, MD5Hash& md5) void float3_array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{ {
/* Don't compare 4th element used for padding. */ /* Don't compare 4th element used for padding. */
const array<float3>& a = *(const array<float3>*)(((char*)node) + socket.struct_offset); const array<float3> &a = *(const array<float3> *)(((char *)node) + socket.struct_offset);
for(size_t i = 0; i < a.size(); i++) { for (size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t*)&a[i], sizeof(float) * 3); md5.append((uint8_t *)&a[i], sizeof(float) * 3);
} }
} }
} // namespace } // namespace
void Node::hash(MD5Hash& md5) void Node::hash(MD5Hash &md5)
{ {
md5.append(type->name.string()); md5.append(type->name.string());
foreach(const SocketType& socket, type->inputs) { foreach (const SocketType &socket, type->inputs) {
md5.append(socket.name.string()); md5.append(socket.name.string());
switch(socket.type) { switch (socket.type) {
case SocketType::BOOLEAN: value_hash<bool>(this, socket, md5); break; case SocketType::BOOLEAN:
case SocketType::FLOAT: value_hash<float>(this, socket, md5); break; value_hash<bool>(this, socket, md5);
case SocketType::INT: value_hash<int>(this, socket, md5); break; break;
case SocketType::UINT: value_hash<uint>(this, socket, md5); break; case SocketType::FLOAT:
case SocketType::COLOR: float3_hash(this, socket, md5); break; value_hash<float>(this, socket, md5);
case SocketType::VECTOR: float3_hash(this, socket, md5); break; break;
case SocketType::POINT: float3_hash(this, socket, md5); break; case SocketType::INT:
case SocketType::NORMAL: float3_hash(this, socket, md5); break; value_hash<int>(this, socket, md5);
case SocketType::POINT2: value_hash<float2>(this, socket, md5); break; break;
case SocketType::CLOSURE: break; case SocketType::UINT:
case SocketType::STRING: value_hash<ustring>(this, socket, md5); break; value_hash<uint>(this, socket, md5);
case SocketType::ENUM: value_hash<int>(this, socket, md5); break; break;
case SocketType::TRANSFORM: value_hash<Transform>(this, socket, md5); break; case SocketType::COLOR:
case SocketType::NODE: value_hash<void*>(this, socket, md5); break; float3_hash(this, socket, md5);
break;
case SocketType::VECTOR:
float3_hash(this, socket, md5);
break;
case SocketType::POINT:
float3_hash(this, socket, md5);
break;
case SocketType::NORMAL:
float3_hash(this, socket, md5);
break;
case SocketType::POINT2:
value_hash<float2>(this, socket, md5);
break;
case SocketType::CLOSURE:
break;
case SocketType::STRING:
value_hash<ustring>(this, socket, md5);
break;
case SocketType::ENUM:
value_hash<int>(this, socket, md5);
break;
case SocketType::TRANSFORM:
value_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE:
value_hash<void *>(this, socket, md5);
break;
case SocketType::BOOLEAN_ARRAY: array_hash<bool>(this, socket, md5); break; case SocketType::BOOLEAN_ARRAY:
case SocketType::FLOAT_ARRAY: array_hash<float>(this, socket, md5); break; array_hash<bool>(this, socket, md5);
case SocketType::INT_ARRAY: array_hash<int>(this, socket, md5); break; break;
case SocketType::COLOR_ARRAY: float3_array_hash(this, socket, md5); break; case SocketType::FLOAT_ARRAY:
case SocketType::VECTOR_ARRAY: float3_array_hash(this, socket, md5); break; array_hash<float>(this, socket, md5);
case SocketType::POINT_ARRAY: float3_array_hash(this, socket, md5); break; break;
case SocketType::NORMAL_ARRAY: float3_array_hash(this, socket, md5); break; case SocketType::INT_ARRAY:
case SocketType::POINT2_ARRAY: array_hash<float2>(this, socket, md5); break; array_hash<int>(this, socket, md5);
case SocketType::STRING_ARRAY: array_hash<ustring>(this, socket, md5); break; break;
case SocketType::TRANSFORM_ARRAY: array_hash<Transform>(this, socket, md5); break; case SocketType::COLOR_ARRAY:
case SocketType::NODE_ARRAY: array_hash<void*>(this, socket, md5); break; float3_array_hash(this, socket, md5);
break;
case SocketType::VECTOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::NORMAL_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT2_ARRAY:
array_hash<float2>(this, socket, md5);
break;
case SocketType::STRING_ARRAY:
array_hash<ustring>(this, socket, md5);
break;
case SocketType::TRANSFORM_ARRAY:
array_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE_ARRAY:
array_hash<void *>(this, socket, md5);
break;
case SocketType::UNDEFINED: break; case SocketType::UNDEFINED:
} break;
} }
}
} }
namespace { namespace {
template<typename T> template<typename T> size_t array_size_in_bytes(const Node *node, const SocketType &socket)
size_t array_size_in_bytes(const Node *node, const SocketType& socket)
{ {
const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset); const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
return a.size() * sizeof(T); return a.size() * sizeof(T);
} }
} // namespace } // namespace
size_t Node::get_total_size_in_bytes() const size_t Node::get_total_size_in_bytes() const
{ {
size_t total_size = 0; size_t total_size = 0;
foreach(const SocketType& socket, type->inputs) { foreach (const SocketType &socket, type->inputs) {
switch(socket.type) { switch (socket.type) {
case SocketType::BOOLEAN: case SocketType::BOOLEAN:
case SocketType::FLOAT: case SocketType::FLOAT:
case SocketType::INT: case SocketType::INT:
case SocketType::UINT: case SocketType::UINT:
case SocketType::COLOR: case SocketType::COLOR:
case SocketType::VECTOR: case SocketType::VECTOR:
case SocketType::POINT: case SocketType::POINT:
case SocketType::NORMAL: case SocketType::NORMAL:
case SocketType::POINT2: case SocketType::POINT2:
case SocketType::CLOSURE: case SocketType::CLOSURE:
case SocketType::STRING: case SocketType::STRING:
case SocketType::ENUM: case SocketType::ENUM:
case SocketType::TRANSFORM: case SocketType::TRANSFORM:
case SocketType::NODE: case SocketType::NODE:
total_size += socket.size(); total_size += socket.size();
break; break;
case SocketType::BOOLEAN_ARRAY: case SocketType::BOOLEAN_ARRAY:
total_size += array_size_in_bytes<bool>(this, socket); total_size += array_size_in_bytes<bool>(this, socket);
break; break;
case SocketType::FLOAT_ARRAY: case SocketType::FLOAT_ARRAY:
total_size += array_size_in_bytes<float>(this, socket); total_size += array_size_in_bytes<float>(this, socket);
break; break;
case SocketType::INT_ARRAY: case SocketType::INT_ARRAY:
total_size += array_size_in_bytes<int>(this, socket); total_size += array_size_in_bytes<int>(this, socket);
break; break;
case SocketType::COLOR_ARRAY: case SocketType::COLOR_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket); total_size += array_size_in_bytes<float3>(this, socket);
break; break;
case SocketType::VECTOR_ARRAY: case SocketType::VECTOR_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket); total_size += array_size_in_bytes<float3>(this, socket);
break; break;
case SocketType::POINT_ARRAY: case SocketType::POINT_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket); total_size += array_size_in_bytes<float3>(this, socket);
break; break;
case SocketType::NORMAL_ARRAY: case SocketType::NORMAL_ARRAY:
total_size += array_size_in_bytes<float3>(this, socket); total_size += array_size_in_bytes<float3>(this, socket);
break; break;
case SocketType::POINT2_ARRAY: case SocketType::POINT2_ARRAY:
total_size += array_size_in_bytes<float2>(this, socket); total_size += array_size_in_bytes<float2>(this, socket);
break; break;
case SocketType::STRING_ARRAY: case SocketType::STRING_ARRAY:
total_size += array_size_in_bytes<ustring>(this, socket); total_size += array_size_in_bytes<ustring>(this, socket);
break; break;
case SocketType::TRANSFORM_ARRAY: case SocketType::TRANSFORM_ARRAY:
total_size += array_size_in_bytes<Transform>(this, socket); total_size += array_size_in_bytes<Transform>(this, socket);
break; break;
case SocketType::NODE_ARRAY: case SocketType::NODE_ARRAY:
total_size += array_size_in_bytes<void*>(this, socket); total_size += array_size_in_bytes<void *>(this, socket);
break; break;
case SocketType::UNDEFINED: break; case SocketType::UNDEFINED:
} break;
} }
return total_size; }
return total_size;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -31,72 +31,71 @@ struct Transform;
/* Node */ /* Node */
struct Node struct Node {
{ explicit Node(const NodeType *type, ustring name = ustring());
explicit Node(const NodeType *type, ustring name = ustring()); virtual ~Node();
virtual ~Node();
/* set values */ /* set values */
void set(const SocketType& input, bool value); void set(const SocketType &input, bool value);
void set(const SocketType& input, int value); void set(const SocketType &input, int value);
void set(const SocketType& input, uint value); void set(const SocketType &input, uint value);
void set(const SocketType& input, float value); void set(const SocketType &input, float value);
void set(const SocketType& input, float2 value); void set(const SocketType &input, float2 value);
void set(const SocketType& input, float3 value); void set(const SocketType &input, float3 value);
void set(const SocketType& input, const char *value); void set(const SocketType &input, const char *value);
void set(const SocketType& input, ustring value); void set(const SocketType &input, ustring value);
void set(const SocketType& input, const Transform& value); void set(const SocketType &input, const Transform &value);
void set(const SocketType& input, Node *value); void set(const SocketType &input, Node *value);
/* set array values. the memory from the input array will taken over /* set array values. the memory from the input array will taken over
* by the node and the input array will be empty after return */ * by the node and the input array will be empty after return */
void set(const SocketType& input, array<bool>& value); void set(const SocketType &input, array<bool> &value);
void set(const SocketType& input, array<int>& value); void set(const SocketType &input, array<int> &value);
void set(const SocketType& input, array<float>& value); void set(const SocketType &input, array<float> &value);
void set(const SocketType& input, array<float2>& value); void set(const SocketType &input, array<float2> &value);
void set(const SocketType& input, array<float3>& value); void set(const SocketType &input, array<float3> &value);
void set(const SocketType& input, array<ustring>& value); void set(const SocketType &input, array<ustring> &value);
void set(const SocketType& input, array<Transform>& value); void set(const SocketType &input, array<Transform> &value);
void set(const SocketType& input, array<Node*>& value); void set(const SocketType &input, array<Node *> &value);
/* get values */ /* get values */
bool get_bool(const SocketType& input) const; bool get_bool(const SocketType &input) const;
int get_int(const SocketType& input) const; int get_int(const SocketType &input) const;
uint get_uint(const SocketType& input) const; uint get_uint(const SocketType &input) const;
float get_float(const SocketType& input) const; float get_float(const SocketType &input) const;
float2 get_float2(const SocketType& input) const; float2 get_float2(const SocketType &input) const;
float3 get_float3(const SocketType& input) const; float3 get_float3(const SocketType &input) const;
ustring get_string(const SocketType& input) const; ustring get_string(const SocketType &input) const;
Transform get_transform(const SocketType& input) const; Transform get_transform(const SocketType &input) const;
Node *get_node(const SocketType& input) const; Node *get_node(const SocketType &input) const;
/* get array values */ /* get array values */
const array<bool>& get_bool_array(const SocketType& input) const; const array<bool> &get_bool_array(const SocketType &input) const;
const array<int>& get_int_array(const SocketType& input) const; const array<int> &get_int_array(const SocketType &input) const;
const array<float>& get_float_array(const SocketType& input) const; const array<float> &get_float_array(const SocketType &input) const;
const array<float2>& get_float2_array(const SocketType& input) const; const array<float2> &get_float2_array(const SocketType &input) const;
const array<float3>& get_float3_array(const SocketType& input) const; const array<float3> &get_float3_array(const SocketType &input) const;
const array<ustring>& get_string_array(const SocketType& input) const; const array<ustring> &get_string_array(const SocketType &input) const;
const array<Transform>& get_transform_array(const SocketType& input) const; const array<Transform> &get_transform_array(const SocketType &input) const;
const array<Node*>& get_node_array(const SocketType& input) const; const array<Node *> &get_node_array(const SocketType &input) const;
/* generic values operations */ /* generic values operations */
bool has_default_value(const SocketType& input) const; bool has_default_value(const SocketType &input) const;
void set_default_value(const SocketType& input); void set_default_value(const SocketType &input);
bool equals_value(const Node& other, const SocketType& input) const; bool equals_value(const Node &other, const SocketType &input) const;
void copy_value(const SocketType& input, const Node& other, const SocketType& other_input); void copy_value(const SocketType &input, const Node &other, const SocketType &other_input);
/* equals */ /* equals */
bool equals(const Node& other) const; bool equals(const Node &other) const;
/* compute hash of node and its socket values */ /* compute hash of node and its socket values */
void hash(MD5Hash& md5); void hash(MD5Hash &md5);
/* Get total size of this node. */ /* Get total size of this node. */
size_t get_total_size_in_bytes() const; size_t get_total_size_in_bytes() const;
ustring name; ustring name;
const NodeType *type; const NodeType *type;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -26,25 +26,50 @@ CCL_NAMESPACE_BEGIN
* Utility class for enum values. */ * Utility class for enum values. */
struct NodeEnum { struct NodeEnum {
bool empty() const { return left.empty(); } bool empty() const
void insert(const char *x, int y) { {
left[ustring(x)] = y; return left.empty();
right[y] = ustring(x); }
} void insert(const char *x, int y)
{
left[ustring(x)] = y;
right[y] = ustring(x);
}
bool exists(ustring x) const { return left.find(x) != left.end(); } bool exists(ustring x) const
bool exists(int y) const { return right.find(y) != right.end(); } {
return left.find(x) != left.end();
}
bool exists(int y) const
{
return right.find(y) != right.end();
}
int operator[](const char *x) const { return left.find(ustring(x))->second; } int operator[](const char *x) const
int operator[](ustring x) const { return left.find(x)->second; } {
ustring operator[](int y) const { return right.find(y)->second; } return left.find(ustring(x))->second;
}
int operator[](ustring x) const
{
return left.find(x)->second;
}
ustring operator[](int y) const
{
return right.find(y)->second;
}
unordered_map<ustring, int, ustringHash>::const_iterator begin() const { return left.begin(); } unordered_map<ustring, int, ustringHash>::const_iterator begin() const
unordered_map<ustring, int, ustringHash>::const_iterator end() const { return left.end(); } {
return left.begin();
}
unordered_map<ustring, int, ustringHash>::const_iterator end() const
{
return left.end();
}
private: private:
unordered_map<ustring, int, ustringHash> left; unordered_map<ustring, int, ustringHash> left;
unordered_map<int, ustring> right; unordered_map<int, ustring> right;
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -24,107 +24,118 @@ CCL_NAMESPACE_BEGIN
size_t SocketType::size() const size_t SocketType::size() const
{ {
return size(type); return size(type);
} }
bool SocketType::is_array() const bool SocketType::is_array() const
{ {
return (type >= BOOLEAN_ARRAY); return (type >= BOOLEAN_ARRAY);
} }
size_t SocketType::size(Type type) size_t SocketType::size(Type type)
{ {
switch(type) switch (type) {
{ case UNDEFINED:
case UNDEFINED: return 0; return 0;
case BOOLEAN: return sizeof(bool); case BOOLEAN:
case FLOAT: return sizeof(float); return sizeof(bool);
case INT: return sizeof(int); case FLOAT:
case UINT: return sizeof(uint); return sizeof(float);
case COLOR: return sizeof(float3); case INT:
case VECTOR: return sizeof(float3); return sizeof(int);
case POINT: return sizeof(float3); case UINT:
case NORMAL: return sizeof(float3); return sizeof(uint);
case POINT2: return sizeof(float2); case COLOR:
case CLOSURE: return 0; return sizeof(float3);
case STRING: return sizeof(ustring); case VECTOR:
case ENUM: return sizeof(int); return sizeof(float3);
case TRANSFORM: return sizeof(Transform); case POINT:
case NODE: return sizeof(void*); return sizeof(float3);
case NORMAL:
return sizeof(float3);
case POINT2:
return sizeof(float2);
case CLOSURE:
return 0;
case STRING:
return sizeof(ustring);
case ENUM:
return sizeof(int);
case TRANSFORM:
return sizeof(Transform);
case NODE:
return sizeof(void *);
case BOOLEAN_ARRAY: return sizeof(array<bool>); case BOOLEAN_ARRAY:
case FLOAT_ARRAY: return sizeof(array<float>); return sizeof(array<bool>);
case INT_ARRAY: return sizeof(array<int>); case FLOAT_ARRAY:
case COLOR_ARRAY: return sizeof(array<float3>); return sizeof(array<float>);
case VECTOR_ARRAY: return sizeof(array<float3>); case INT_ARRAY:
case POINT_ARRAY: return sizeof(array<float3>); return sizeof(array<int>);
case NORMAL_ARRAY: return sizeof(array<float3>); case COLOR_ARRAY:
case POINT2_ARRAY: return sizeof(array<float2>); return sizeof(array<float3>);
case STRING_ARRAY: return sizeof(array<ustring>); case VECTOR_ARRAY:
case TRANSFORM_ARRAY: return sizeof(array<Transform>); return sizeof(array<float3>);
case NODE_ARRAY: return sizeof(array<void*>); case POINT_ARRAY:
} return sizeof(array<float3>);
case NORMAL_ARRAY:
return sizeof(array<float3>);
case POINT2_ARRAY:
return sizeof(array<float2>);
case STRING_ARRAY:
return sizeof(array<ustring>);
case TRANSFORM_ARRAY:
return sizeof(array<Transform>);
case NODE_ARRAY:
return sizeof(array<void *>);
}
assert(0); assert(0);
return 0; return 0;
} }
size_t SocketType::max_size() size_t SocketType::max_size()
{ {
return sizeof(Transform); return sizeof(Transform);
} }
void *SocketType::zero_default_value() void *SocketType::zero_default_value()
{ {
static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}; static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
return &zero_transform; return &zero_transform;
} }
ustring SocketType::type_name(Type type) ustring SocketType::type_name(Type type)
{ {
static ustring names[] = { static ustring names[] = {ustring("undefined"),
ustring("undefined"),
ustring("boolean"), ustring("boolean"), ustring("float"),
ustring("float"), ustring("int"), ustring("uint"),
ustring("int"), ustring("color"), ustring("vector"),
ustring("uint"), ustring("point"), ustring("normal"),
ustring("color"), ustring("point2"), ustring("closure"),
ustring("vector"), ustring("string"), ustring("enum"),
ustring("point"), ustring("transform"), ustring("node"),
ustring("normal"),
ustring("point2"),
ustring("closure"),
ustring("string"),
ustring("enum"),
ustring("transform"),
ustring("node"),
ustring("array_boolean"), ustring("array_boolean"), ustring("array_float"),
ustring("array_float"), ustring("array_int"), ustring("array_color"),
ustring("array_int"), ustring("array_vector"), ustring("array_point"),
ustring("array_color"), ustring("array_normal"), ustring("array_point2"),
ustring("array_vector"), ustring("array_string"), ustring("array_transform"),
ustring("array_point"), ustring("array_node")};
ustring("array_normal"),
ustring("array_point2"),
ustring("array_string"),
ustring("array_transform"),
ustring("array_node")};
return names[(int)type]; return names[(int)type];
} }
bool SocketType::is_float3(Type type) bool SocketType::is_float3(Type type)
{ {
return (type == COLOR || type == VECTOR || type == POINT || type == NORMAL); return (type == COLOR || type == VECTOR || type == POINT || type == NORMAL);
} }
/* Node Type */ /* Node Type */
NodeType::NodeType(Type type_) NodeType::NodeType(Type type_) : type(type_)
: type(type_)
{ {
} }
@@ -132,88 +143,94 @@ NodeType::~NodeType()
{ {
} }
void NodeType::register_input(ustring name, ustring ui_name, SocketType::Type type, int struct_offset, void NodeType::register_input(ustring name,
const void *default_value, const NodeEnum *enum_values, ustring ui_name,
const NodeType **node_type, int flags, int extra_flags) SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values,
const NodeType **node_type,
int flags,
int extra_flags)
{ {
SocketType socket; SocketType socket;
socket.name = name; socket.name = name;
socket.ui_name = ui_name; socket.ui_name = ui_name;
socket.type = type; socket.type = type;
socket.struct_offset = struct_offset; socket.struct_offset = struct_offset;
socket.default_value = default_value; socket.default_value = default_value;
socket.enum_values = enum_values; socket.enum_values = enum_values;
socket.node_type = node_type; socket.node_type = node_type;
socket.flags = flags | extra_flags; socket.flags = flags | extra_flags;
inputs.push_back(socket); inputs.push_back(socket);
} }
void NodeType::register_output(ustring name, ustring ui_name, SocketType::Type type) void NodeType::register_output(ustring name, ustring ui_name, SocketType::Type type)
{ {
SocketType socket; SocketType socket;
socket.name = name; socket.name = name;
socket.ui_name = ui_name; socket.ui_name = ui_name;
socket.type = type; socket.type = type;
socket.struct_offset = 0; socket.struct_offset = 0;
socket.default_value = NULL; socket.default_value = NULL;
socket.enum_values = NULL; socket.enum_values = NULL;
socket.node_type = NULL; socket.node_type = NULL;
socket.flags = SocketType::LINKABLE; socket.flags = SocketType::LINKABLE;
outputs.push_back(socket); outputs.push_back(socket);
} }
const SocketType *NodeType::find_input(ustring name) const const SocketType *NodeType::find_input(ustring name) const
{ {
foreach(const SocketType& socket, inputs) { foreach (const SocketType &socket, inputs) {
if(socket.name == name) { if (socket.name == name) {
return &socket; return &socket;
} }
} }
return NULL; return NULL;
} }
const SocketType *NodeType::find_output(ustring name) const const SocketType *NodeType::find_output(ustring name) const
{ {
foreach(const SocketType& socket, outputs) { foreach (const SocketType &socket, outputs) {
if(socket.name == name) { if (socket.name == name) {
return &socket; return &socket;
} }
} }
return NULL; return NULL;
} }
/* Node Type Registry */ /* Node Type Registry */
unordered_map<ustring, NodeType, ustringHash>& NodeType::types() unordered_map<ustring, NodeType, ustringHash> &NodeType::types()
{ {
static unordered_map<ustring, NodeType, ustringHash> _types; static unordered_map<ustring, NodeType, ustringHash> _types;
return _types; return _types;
} }
NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_) NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
{ {
ustring name(name_); ustring name(name_);
if(types().find(name) != types().end()) { if (types().find(name) != types().end()) {
fprintf(stderr, "Node type %s registered twice!\n", name_); fprintf(stderr, "Node type %s registered twice!\n", name_);
assert(0); assert(0);
return NULL; return NULL;
} }
types()[name] = NodeType(type_); types()[name] = NodeType(type_);
NodeType *type = &types()[name]; NodeType *type = &types()[name];
type->name = name; type->name = name;
type->create = create_; type->create = create_;
return type; return type;
} }
const NodeType *NodeType::find(ustring name) const NodeType *NodeType::find(ustring name)
{ {
unordered_map<ustring, NodeType, ustringHash>::iterator it = types().find(name); unordered_map<ustring, NodeType, ustringHash>::iterator it = types().find(name);
return (it == types().end()) ? NULL : &it->second; return (it == types().end()) ? NULL : &it->second;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -30,236 +30,349 @@ struct NodeType;
/* Socket Type */ /* Socket Type */
struct SocketType struct SocketType {
{ enum Type {
enum Type UNDEFINED,
{
UNDEFINED,
BOOLEAN, BOOLEAN,
FLOAT, FLOAT,
INT, INT,
UINT, UINT,
COLOR, COLOR,
VECTOR, VECTOR,
POINT, POINT,
NORMAL, NORMAL,
POINT2, POINT2,
CLOSURE, CLOSURE,
STRING, STRING,
ENUM, ENUM,
TRANSFORM, TRANSFORM,
NODE, NODE,
BOOLEAN_ARRAY, BOOLEAN_ARRAY,
FLOAT_ARRAY, FLOAT_ARRAY,
INT_ARRAY, INT_ARRAY,
COLOR_ARRAY, COLOR_ARRAY,
VECTOR_ARRAY, VECTOR_ARRAY,
POINT_ARRAY, POINT_ARRAY,
NORMAL_ARRAY, NORMAL_ARRAY,
POINT2_ARRAY, POINT2_ARRAY,
STRING_ARRAY, STRING_ARRAY,
TRANSFORM_ARRAY, TRANSFORM_ARRAY,
NODE_ARRAY, NODE_ARRAY,
}; };
enum Flags { enum Flags {
LINKABLE = (1 << 0), LINKABLE = (1 << 0),
ANIMATABLE = (1 << 1), ANIMATABLE = (1 << 1),
SVM_INTERNAL = (1 << 2), SVM_INTERNAL = (1 << 2),
OSL_INTERNAL = (1 << 3), OSL_INTERNAL = (1 << 3),
INTERNAL = (1 << 2) | (1 << 3), INTERNAL = (1 << 2) | (1 << 3),
LINK_TEXTURE_GENERATED = (1 << 4), LINK_TEXTURE_GENERATED = (1 << 4),
LINK_TEXTURE_NORMAL = (1 << 5), LINK_TEXTURE_NORMAL = (1 << 5),
LINK_TEXTURE_UV = (1 << 6), LINK_TEXTURE_UV = (1 << 6),
LINK_INCOMING = (1 << 7), LINK_INCOMING = (1 << 7),
LINK_NORMAL = (1 << 8), LINK_NORMAL = (1 << 8),
LINK_POSITION = (1 << 9), LINK_POSITION = (1 << 9),
LINK_TANGENT = (1 << 10), LINK_TANGENT = (1 << 10),
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10) DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
}; };
ustring name; ustring name;
Type type; Type type;
int struct_offset; int struct_offset;
const void *default_value; const void *default_value;
const NodeEnum *enum_values; const NodeEnum *enum_values;
const NodeType **node_type; const NodeType **node_type;
int flags; int flags;
ustring ui_name; ustring ui_name;
size_t size() const; size_t size() const;
bool is_array() const; bool is_array() const;
static size_t size(Type type); static size_t size(Type type);
static size_t max_size(); static size_t max_size();
static ustring type_name(Type type); static ustring type_name(Type type);
static void *zero_default_value(); static void *zero_default_value();
static bool is_float3(Type type); static bool is_float3(Type type);
}; };
/* Node Type */ /* Node Type */
struct NodeType struct NodeType {
{ enum Type { NONE, SHADER };
enum Type {
NONE,
SHADER
};
explicit NodeType(Type type = NONE); explicit NodeType(Type type = NONE);
~NodeType(); ~NodeType();
void register_input(ustring name, ustring ui_name, SocketType::Type type, void register_input(ustring name,
int struct_offset, const void *default_value, ustring ui_name,
const NodeEnum *enum_values = NULL, SocketType::Type type,
const NodeType **node_type = NULL, int struct_offset,
int flags = 0, int extra_flags = 0); const void *default_value,
void register_output(ustring name, ustring ui_name, SocketType::Type type); const NodeEnum *enum_values = NULL,
const NodeType **node_type = NULL,
int flags = 0,
int extra_flags = 0);
void register_output(ustring name, ustring ui_name, SocketType::Type type);
const SocketType *find_input(ustring name) const; const SocketType *find_input(ustring name) const;
const SocketType *find_output(ustring name) const; const SocketType *find_output(ustring name) const;
typedef Node *(*CreateFunc)(const NodeType *type); typedef Node *(*CreateFunc)(const NodeType *type);
ustring name; ustring name;
Type type; Type type;
vector<SocketType, std::allocator<SocketType> > inputs; vector<SocketType, std::allocator<SocketType>> inputs;
vector<SocketType, std::allocator<SocketType> > outputs; vector<SocketType, std::allocator<SocketType>> outputs;
CreateFunc create; CreateFunc create;
static NodeType *add(const char *name, CreateFunc create, Type type = NONE); static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
static const NodeType *find(ustring name); static const NodeType *find(ustring name);
static unordered_map<ustring, NodeType, ustringHash>& types(); static unordered_map<ustring, NodeType, ustringHash> &types();
}; };
/* Node Definition Macros */ /* Node Definition Macros */
#define NODE_DECLARE \ #define NODE_DECLARE \
template<typename T> \ template<typename T> static const NodeType *register_type(); \
static const NodeType *register_type(); \ static Node *create(const NodeType *type); \
static Node *create(const NodeType *type); \ static const NodeType *node_type;
static const NodeType *node_type;
#define NODE_DEFINE(structname) \ #define NODE_DEFINE(structname) \
const NodeType *structname::node_type = structname::register_type<structname>(); \ const NodeType *structname::node_type = structname::register_type<structname>(); \
Node *structname::create(const NodeType*) { return new structname(); } \ Node *structname::create(const NodeType *) \
template<typename T> \ { \
const NodeType *structname::register_type() return new structname(); \
} \
template<typename T> const NodeType *structname::register_type()
/* Sock Definition Macros */ /* Sock Definition Macros */
#define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1) #define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1)
#define SOCKET_SIZEOF(T, name) (sizeof(((T *)1)->name)) #define SOCKET_SIZEOF(T, name) (sizeof(((T *)1)->name))
#define SOCKET_DEFINE(name, ui_name, default_value, datatype, TYPE, flags, ...) \ #define SOCKET_DEFINE(name, ui_name, default_value, datatype, TYPE, flags, ...) \
{ \ { \
static datatype defval = default_value; \ static datatype defval = default_value; \
CHECK_TYPE(((T *)1)->name, datatype); \ CHECK_TYPE(((T *)1)->name, datatype); \
type->register_input(ustring(#name), ustring(ui_name), TYPE, SOCKET_OFFSETOF(T, name), &defval, NULL, NULL, flags, ##__VA_ARGS__); \ type->register_input(ustring(#name), \
} ustring(ui_name), \
TYPE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
NULL, \
flags, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN(name, ui_name, default_value, ...) \ #define SOCKET_BOOLEAN(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, 0, ##__VA_ARGS__)
#define SOCKET_INT(name, ui_name, default_value, ...) \ #define SOCKET_INT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, 0, ##__VA_ARGS__)
#define SOCKET_UINT(name, ui_name, default_value, ...) \ #define SOCKET_UINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, uint, SocketType::UINT, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, uint, SocketType::UINT, 0, ##__VA_ARGS__)
#define SOCKET_FLOAT(name, ui_name, default_value, ...) \ #define SOCKET_FLOAT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, 0, ##__VA_ARGS__)
#define SOCKET_COLOR(name, ui_name, default_value, ...) \ #define SOCKET_COLOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, 0, ##__VA_ARGS__)
#define SOCKET_VECTOR(name, ui_name, default_value, ...) \ #define SOCKET_VECTOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, 0, ##__VA_ARGS__)
#define SOCKET_POINT(name, ui_name, default_value, ...) \ #define SOCKET_POINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, 0, ##__VA_ARGS__)
#define SOCKET_NORMAL(name, ui_name, default_value, ...) \ #define SOCKET_NORMAL(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, 0, ##__VA_ARGS__)
#define SOCKET_POINT2(name, ui_name, default_value, ...) \ #define SOCKET_POINT2(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float2, SocketType::POINT2, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, float2, SocketType::POINT2, 0, ##__VA_ARGS__)
#define SOCKET_STRING(name, ui_name, default_value, ...) \ #define SOCKET_STRING(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, 0, ##__VA_ARGS__)
#define SOCKET_TRANSFORM(name, ui_name, default_value, ...) \ #define SOCKET_TRANSFORM(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, Transform, SocketType::TRANSFORM, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, Transform, SocketType::TRANSFORM, 0, ##__VA_ARGS__)
#define SOCKET_ENUM(name, ui_name, values, default_value, ...) \ #define SOCKET_ENUM(name, ui_name, values, default_value, ...) \
{ \ { \
static int defval = default_value; \ static int defval = default_value; \
assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \ assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::ENUM, SOCKET_OFFSETOF(T, name), &defval, &values, NULL, ##__VA_ARGS__); \ type->register_input(ustring(#name), \
} ustring(ui_name), \
SocketType::ENUM, \
SOCKET_OFFSETOF(T, name), \
&defval, \
&values, \
NULL, \
##__VA_ARGS__); \
}
#define SOCKET_NODE(name, ui_name, node_type, ...) \ #define SOCKET_NODE(name, ui_name, node_type, ...) \
{ \ { \
static Node *defval = NULL; \ static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \ assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \ type->register_input(ustring(#name), \
} ustring(ui_name), \
SocketType::NODE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_BOOLEAN_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_INT_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_INT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_FLOAT_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_FLOAT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_COLOR_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_COLOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_VECTOR_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_VECTOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_POINT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_NORMAL_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_NORMAL_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT2_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_POINT2_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_STRING_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_STRING_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_TRANSFORM_ARRAY(name, ui_name, default_value, ...) \ #define SOCKET_TRANSFORM_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<Transform>, SocketType::TRANSFORM_ARRAY, 0, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
array<Transform>, \
SocketType::TRANSFORM_ARRAY, \
0, \
##__VA_ARGS__)
#define SOCKET_NODE_ARRAY(name, ui_name, node_type, ...) \ #define SOCKET_NODE_ARRAY(name, ui_name, node_type, ...) \
{ \ { \
static Node *defval = NULL; \ static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \ assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE_ARRAY, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \ type->register_input(ustring(#name), \
} ustring(ui_name), \
SocketType::NODE_ARRAY, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_IN_BOOLEAN(name, ui_name, default_value, ...) \ #define SOCKET_IN_BOOLEAN(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
bool, \
SocketType::BOOLEAN, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_INT(name, ui_name, default_value, ...) \ #define SOCKET_IN_INT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE( \
name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
#define SOCKET_IN_FLOAT(name, ui_name, default_value, ...) \ #define SOCKET_IN_FLOAT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float, \
SocketType::FLOAT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_COLOR(name, ui_name, default_value, ...) \ #define SOCKET_IN_COLOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::COLOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_VECTOR(name, ui_name, default_value, ...) \ #define SOCKET_IN_VECTOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::VECTOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_POINT(name, ui_name, default_value, ...) \ #define SOCKET_IN_POINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::POINT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_NORMAL(name, ui_name, default_value, ...) \ #define SOCKET_IN_NORMAL(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::NORMAL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_STRING(name, ui_name, default_value, ...) \ #define SOCKET_IN_STRING(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, SocketType::LINKABLE, ##__VA_ARGS__) SOCKET_DEFINE(name, \
ui_name, \
default_value, \
ustring, \
SocketType::STRING, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_CLOSURE(name, ui_name, ...) \ #define SOCKET_IN_CLOSURE(name, ui_name, ...) \
type->register_input(ustring(#name), ustring(ui_name), SocketType::CLOSURE, 0, NULL, NULL, NULL, SocketType::LINKABLE, ##__VA_ARGS__) type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::CLOSURE, \
0, \
NULL, \
NULL, \
NULL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_OUT_BOOLEAN(name, ui_name) \ #define SOCKET_OUT_BOOLEAN(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); \
}
#define SOCKET_OUT_INT(name, ui_name) \ #define SOCKET_OUT_INT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); \
}
#define SOCKET_OUT_FLOAT(name, ui_name) \ #define SOCKET_OUT_FLOAT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); \
}
#define SOCKET_OUT_COLOR(name, ui_name) \ #define SOCKET_OUT_COLOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); \
}
#define SOCKET_OUT_VECTOR(name, ui_name) \ #define SOCKET_OUT_VECTOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); \
}
#define SOCKET_OUT_POINT(name, ui_name) \ #define SOCKET_OUT_POINT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); \
}
#define SOCKET_OUT_NORMAL(name, ui_name) \ #define SOCKET_OUT_NORMAL(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); \
}
#define SOCKET_OUT_CLOSURE(name, ui_name) \ #define SOCKET_OUT_CLOSURE(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); \
}
#define SOCKET_OUT_STRING(name, ui_name) \ #define SOCKET_OUT_STRING(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); \
}
#define SOCKET_OUT_ENUM(name, ui_name) \ #define SOCKET_OUT_ENUM(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); } { \
type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); \
}
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -24,437 +24,409 @@ CCL_NAMESPACE_BEGIN
static bool xml_read_boolean(const char *value) static bool xml_read_boolean(const char *value)
{ {
return string_iequals(value, "true") || (atoi(value) != 0); return string_iequals(value, "true") || (atoi(value) != 0);
} }
static const char *xml_write_boolean(bool value) static const char *xml_write_boolean(bool value)
{ {
return (value) ? "true" : "false"; return (value) ? "true" : "false";
} }
template<int VECTOR_SIZE, typename T> template<int VECTOR_SIZE, typename T>
static void xml_read_float_array(T& value, xml_attribute attr) static void xml_read_float_array(T &value, xml_attribute attr)
{ {
vector<string> tokens; vector<string> tokens;
string_split(tokens, attr.value()); string_split(tokens, attr.value());
if(tokens.size() % VECTOR_SIZE != 0) { if (tokens.size() % VECTOR_SIZE != 0) {
return; return;
} }
value.resize(tokens.size() / VECTOR_SIZE); value.resize(tokens.size() / VECTOR_SIZE);
for(size_t i = 0; i < value.size(); i++) { for (size_t i = 0; i < value.size(); i++) {
float *value_float = (float*)&value[i]; float *value_float = (float *)&value[i];
for(size_t j = 0; j < VECTOR_SIZE; j++) for (size_t j = 0; j < VECTOR_SIZE; j++)
value_float[j] = (float)atof(tokens[i * VECTOR_SIZE + j].c_str()); value_float[j] = (float)atof(tokens[i * VECTOR_SIZE + j].c_str());
} }
} }
void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node) void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
{ {
xml_attribute name_attr = xml_node.attribute("name"); xml_attribute name_attr = xml_node.attribute("name");
if(name_attr) { if (name_attr) {
node->name = ustring(name_attr.value()); node->name = ustring(name_attr.value());
} }
foreach(const SocketType& socket, node->type->inputs) { foreach (const SocketType &socket, node->type->inputs) {
if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) { if (socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue; continue;
} }
if(socket.flags & SocketType::INTERNAL) { if (socket.flags & SocketType::INTERNAL) {
continue; continue;
} }
xml_attribute attr = xml_node.attribute(socket.name.c_str()); xml_attribute attr = xml_node.attribute(socket.name.c_str());
if(!attr) { if (!attr) {
continue; continue;
} }
switch(socket.type) switch (socket.type) {
{ case SocketType::BOOLEAN: {
case SocketType::BOOLEAN: node->set(socket, xml_read_boolean(attr.value()));
{ break;
node->set(socket, xml_read_boolean(attr.value())); }
break; case SocketType::BOOLEAN_ARRAY: {
} vector<string> tokens;
case SocketType::BOOLEAN_ARRAY: string_split(tokens, attr.value());
{
vector<string> tokens;
string_split(tokens, attr.value());
array<bool> value; array<bool> value;
value.resize(tokens.size()); value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) for (size_t i = 0; i < value.size(); i++)
value[i] = xml_read_boolean(tokens[i].c_str()); value[i] = xml_read_boolean(tokens[i].c_str());
node->set(socket, value); node->set(socket, value);
break; break;
} }
case SocketType::FLOAT: case SocketType::FLOAT: {
{ node->set(socket, (float)atof(attr.value()));
node->set(socket, (float)atof(attr.value())); break;
break; }
} case SocketType::FLOAT_ARRAY: {
case SocketType::FLOAT_ARRAY: array<float> value;
{ xml_read_float_array<1>(value, attr);
array<float> value; node->set(socket, value);
xml_read_float_array<1>(value, attr); break;
node->set(socket, value); }
break; case SocketType::INT: {
} node->set(socket, (int)atoi(attr.value()));
case SocketType::INT: break;
{ }
node->set(socket, (int)atoi(attr.value())); case SocketType::UINT: {
break; node->set(socket, (uint)atoi(attr.value()));
} break;
case SocketType::UINT: }
{ case SocketType::INT_ARRAY: {
node->set(socket, (uint)atoi(attr.value())); vector<string> tokens;
break; string_split(tokens, attr.value());
}
case SocketType::INT_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
array<int> value; array<int> value;
value.resize(tokens.size()); value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) { for (size_t i = 0; i < value.size(); i++) {
value[i] = (int)atoi(attr.value()); value[i] = (int)atoi(attr.value());
} }
node->set(socket, value); node->set(socket, value);
break; break;
} }
case SocketType::COLOR: case SocketType::COLOR:
case SocketType::VECTOR: case SocketType::VECTOR:
case SocketType::POINT: case SocketType::POINT:
case SocketType::NORMAL: case SocketType::NORMAL: {
{ array<float3> value;
array<float3> value; xml_read_float_array<3>(value, attr);
xml_read_float_array<3>(value, attr); if (value.size() == 1) {
if(value.size() == 1) { node->set(socket, value[0]);
node->set(socket, value[0]); }
} break;
break; }
} case SocketType::COLOR_ARRAY:
case SocketType::COLOR_ARRAY: case SocketType::VECTOR_ARRAY:
case SocketType::VECTOR_ARRAY: case SocketType::POINT_ARRAY:
case SocketType::POINT_ARRAY: case SocketType::NORMAL_ARRAY: {
case SocketType::NORMAL_ARRAY: array<float3> value;
{ xml_read_float_array<3>(value, attr);
array<float3> value; node->set(socket, value);
xml_read_float_array<3>(value, attr); break;
node->set(socket, value); }
break; case SocketType::POINT2: {
} array<float2> value;
case SocketType::POINT2: xml_read_float_array<2>(value, attr);
{ if (value.size() == 1) {
array<float2> value; node->set(socket, value[0]);
xml_read_float_array<2>(value, attr); }
if(value.size() == 1) { break;
node->set(socket, value[0]); }
} case SocketType::POINT2_ARRAY: {
break; array<float2> value;
} xml_read_float_array<2>(value, attr);
case SocketType::POINT2_ARRAY: node->set(socket, value);
{ break;
array<float2> value; }
xml_read_float_array<2>(value, attr); case SocketType::STRING: {
node->set(socket, value); node->set(socket, attr.value());
break; break;
} }
case SocketType::STRING: case SocketType::ENUM: {
{ ustring value(attr.value());
node->set(socket, attr.value()); if (socket.enum_values->exists(value)) {
break; node->set(socket, value);
} }
case SocketType::ENUM: else {
{ fprintf(stderr,
ustring value(attr.value()); "Unknown value \"%s\" for attribute \"%s\".\n",
if(socket.enum_values->exists(value)) { value.c_str(),
node->set(socket, value); socket.name.c_str());
} }
else { break;
fprintf(stderr, "Unknown value \"%s\" for attribute \"%s\".\n", value.c_str(), socket.name.c_str()); }
} case SocketType::STRING_ARRAY: {
break; vector<string> tokens;
} string_split(tokens, attr.value());
case SocketType::STRING_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
array<ustring> value; array<ustring> value;
value.resize(tokens.size()); value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) { for (size_t i = 0; i < value.size(); i++) {
value[i] = ustring(tokens[i]); value[i] = ustring(tokens[i]);
} }
node->set(socket, value); node->set(socket, value);
break; break;
} }
case SocketType::TRANSFORM: case SocketType::TRANSFORM: {
{ array<Transform> value;
array<Transform> value; xml_read_float_array<12>(value, attr);
xml_read_float_array<12>(value, attr); if (value.size() == 1) {
if(value.size() == 1) { node->set(socket, value[0]);
node->set(socket, value[0]); }
} break;
break; }
} case SocketType::TRANSFORM_ARRAY: {
case SocketType::TRANSFORM_ARRAY: array<Transform> value;
{ xml_read_float_array<12>(value, attr);
array<Transform> value; node->set(socket, value);
xml_read_float_array<12>(value, attr); break;
node->set(socket, value); }
break; case SocketType::NODE: {
} ustring value(attr.value());
case SocketType::NODE: map<ustring, Node *>::iterator it = reader.node_map.find(value);
{ if (it != reader.node_map.end()) {
ustring value(attr.value()); Node *value_node = it->second;
map<ustring, Node*>::iterator it = reader.node_map.find(value); if (value_node->type == *(socket.node_type))
if(it != reader.node_map.end()) node->set(socket, it->second);
{ }
Node *value_node = it->second; break;
if(value_node->type == *(socket.node_type)) }
node->set(socket, it->second); case SocketType::NODE_ARRAY: {
} vector<string> tokens;
break; string_split(tokens, attr.value());
}
case SocketType::NODE_ARRAY:
{
vector<string> tokens;
string_split(tokens, attr.value());
array<Node*> value; array<Node *> value;
value.resize(tokens.size()); value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) for (size_t i = 0; i < value.size(); i++) {
{ map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i]));
map<ustring, Node*>::iterator it = reader.node_map.find(ustring(tokens[i])); if (it != reader.node_map.end()) {
if(it != reader.node_map.end()) Node *value_node = it->second;
{ value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
Node *value_node = it->second; }
value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL; else {
} value[i] = NULL;
else }
{ }
value[i] = NULL; node->set(socket, value);
} break;
} }
node->set(socket, value); case SocketType::CLOSURE:
break; case SocketType::UNDEFINED:
} break;
case SocketType::CLOSURE: }
case SocketType::UNDEFINED: }
break;
}
}
if(!node->name.empty()) if (!node->name.empty())
reader.node_map[node->name] = node; reader.node_map[node->name] = node;
} }
xml_node xml_write_node(Node *node, xml_node xml_root) xml_node xml_write_node(Node *node, xml_node xml_root)
{ {
xml_node xml_node = xml_root.append_child(node->type->name.c_str()); xml_node xml_node = xml_root.append_child(node->type->name.c_str());
xml_node.append_attribute("name") = node->name.c_str(); xml_node.append_attribute("name") = node->name.c_str();
foreach(const SocketType& socket, node->type->inputs) { foreach (const SocketType &socket, node->type->inputs) {
if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) { if (socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue; continue;
} }
if(socket.flags & SocketType::INTERNAL) { if (socket.flags & SocketType::INTERNAL) {
continue; continue;
} }
if(node->has_default_value(socket)) { if (node->has_default_value(socket)) {
continue; continue;
} }
xml_attribute attr = xml_node.append_attribute(socket.name.c_str()); xml_attribute attr = xml_node.append_attribute(socket.name.c_str());
switch(socket.type) switch (socket.type) {
{ case SocketType::BOOLEAN: {
case SocketType::BOOLEAN: attr = xml_write_boolean(node->get_bool(socket));
{ break;
attr = xml_write_boolean(node->get_bool(socket)); }
break; case SocketType::BOOLEAN_ARRAY: {
} std::stringstream ss;
case SocketType::BOOLEAN_ARRAY: const array<bool> &value = node->get_bool_array(socket);
{ for (size_t i = 0; i < value.size(); i++) {
std::stringstream ss; ss << xml_write_boolean(value[i]);
const array<bool>& value = node->get_bool_array(socket); if (i != value.size() - 1)
for(size_t i = 0; i < value.size(); i++) { ss << " ";
ss << xml_write_boolean(value[i]); }
if(i != value.size() - 1) attr = ss.str().c_str();
ss << " "; break;
} }
attr = ss.str().c_str(); case SocketType::FLOAT: {
break; attr = (double)node->get_float(socket);
} break;
case SocketType::FLOAT: }
{ case SocketType::FLOAT_ARRAY: {
attr = (double)node->get_float(socket); std::stringstream ss;
break; const array<float> &value = node->get_float_array(socket);
} for (size_t i = 0; i < value.size(); i++) {
case SocketType::FLOAT_ARRAY: ss << value[i];
{ if (i != value.size() - 1) {
std::stringstream ss; ss << " ";
const array<float>& value = node->get_float_array(socket); }
for(size_t i = 0; i < value.size(); i++) { }
ss << value[i]; attr = ss.str().c_str();
if(i != value.size() - 1) { break;
ss << " "; }
} case SocketType::INT: {
} attr = node->get_int(socket);
attr = ss.str().c_str(); break;
break; }
} case SocketType::UINT: {
case SocketType::INT: attr = node->get_uint(socket);
{ break;
attr = node->get_int(socket); }
break; case SocketType::INT_ARRAY: {
} std::stringstream ss;
case SocketType::UINT: const array<int> &value = node->get_int_array(socket);
{ for (size_t i = 0; i < value.size(); i++) {
attr = node->get_uint(socket); ss << value[i];
break; if (i != value.size() - 1) {
} ss << " ";
case SocketType::INT_ARRAY: }
{ }
std::stringstream ss; attr = ss.str().c_str();
const array<int>& value = node->get_int_array(socket); break;
for(size_t i = 0; i < value.size(); i++) { }
ss << value[i]; case SocketType::COLOR:
if(i != value.size() - 1) { case SocketType::VECTOR:
ss << " "; case SocketType::POINT:
} case SocketType::NORMAL: {
} float3 value = node->get_float3(socket);
attr = ss.str().c_str(); attr =
break; string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
} break;
case SocketType::COLOR: }
case SocketType::VECTOR: case SocketType::COLOR_ARRAY:
case SocketType::POINT: case SocketType::VECTOR_ARRAY:
case SocketType::NORMAL: case SocketType::POINT_ARRAY:
{ case SocketType::NORMAL_ARRAY: {
float3 value = node->get_float3(socket); std::stringstream ss;
attr = string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str(); const array<float3> &value = node->get_float3_array(socket);
break; for (size_t i = 0; i < value.size(); i++) {
} ss << string_printf(
case SocketType::COLOR_ARRAY: "%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
case SocketType::VECTOR_ARRAY: if (i != value.size() - 1) {
case SocketType::POINT_ARRAY: ss << " ";
case SocketType::NORMAL_ARRAY: }
{ }
std::stringstream ss; attr = ss.str().c_str();
const array<float3>& value = node->get_float3_array(socket); break;
for(size_t i = 0; i < value.size(); i++) { }
ss << string_printf("%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z); case SocketType::POINT2: {
if(i != value.size() - 1) { float2 value = node->get_float2(socket);
ss << " "; attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str();
} break;
} }
attr = ss.str().c_str(); case SocketType::POINT2_ARRAY: {
break; std::stringstream ss;
} const array<float2> &value = node->get_float2_array(socket);
case SocketType::POINT2: for (size_t i = 0; i < value.size(); i++) {
{ ss << string_printf("%g %g", (double)value[i].x, (double)value[i].y);
float2 value = node->get_float2(socket); if (i != value.size() - 1) {
attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str(); ss << " ";
break; }
} }
case SocketType::POINT2_ARRAY: attr = ss.str().c_str();
{ break;
std::stringstream ss; }
const array<float2>& value = node->get_float2_array(socket); case SocketType::STRING:
for(size_t i = 0; i < value.size(); i++) { case SocketType::ENUM: {
ss << string_printf("%g %g", (double)value[i].x, (double)value[i].y); attr = node->get_string(socket).c_str();
if(i != value.size() - 1) { break;
ss << " "; }
} case SocketType::STRING_ARRAY: {
} std::stringstream ss;
attr = ss.str().c_str(); const array<ustring> &value = node->get_string_array(socket);
break; for (size_t i = 0; i < value.size(); i++) {
} ss << value[i];
case SocketType::STRING: if (i != value.size() - 1) {
case SocketType::ENUM: ss << " ";
{ }
attr = node->get_string(socket).c_str(); }
break; attr = ss.str().c_str();
} break;
case SocketType::STRING_ARRAY: }
{ case SocketType::TRANSFORM: {
std::stringstream ss; Transform tfm = node->get_transform(socket);
const array<ustring>& value = node->get_string_array(socket); std::stringstream ss;
for(size_t i = 0; i < value.size(); i++) { for (int i = 0; i < 3; i++) {
ss << value[i]; ss << string_printf("%g %g %g %g ",
if(i != value.size() - 1) { (double)tfm[i][0],
ss << " "; (double)tfm[i][1],
} (double)tfm[i][2],
} (double)tfm[i][3]);
attr = ss.str().c_str(); }
break; ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
} attr = ss.str().c_str();
case SocketType::TRANSFORM: break;
{ }
Transform tfm = node->get_transform(socket); case SocketType::TRANSFORM_ARRAY: {
std::stringstream ss; std::stringstream ss;
for(int i = 0; i < 3; i++) { const array<Transform> &value = node->get_transform_array(socket);
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]); for (size_t j = 0; j < value.size(); j++) {
} const Transform &tfm = value[j];
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM_ARRAY:
{
std::stringstream ss;
const array<Transform>& value = node->get_transform_array(socket);
for(size_t j = 0; j < value.size(); j++) {
const Transform& tfm = value[j];
for(int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]); ss << string_printf("%g %g %g %g ",
} (double)tfm[i][0],
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0); (double)tfm[i][1],
if(j != value.size() - 1) { (double)tfm[i][2],
ss << " "; (double)tfm[i][3]);
} }
} ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str(); if (j != value.size() - 1) {
break; ss << " ";
} }
case SocketType::NODE: }
{ attr = ss.str().c_str();
Node *value = node->get_node(socket); break;
if(value) { }
attr = value->name.c_str(); case SocketType::NODE: {
} Node *value = node->get_node(socket);
break; if (value) {
} attr = value->name.c_str();
case SocketType::NODE_ARRAY: }
{ break;
std::stringstream ss; }
const array<Node*>& value = node->get_node_array(socket); case SocketType::NODE_ARRAY: {
for(size_t i = 0; i < value.size(); i++) { std::stringstream ss;
if(value[i]) { const array<Node *> &value = node->get_node_array(socket);
ss << value[i]->name.c_str(); for (size_t i = 0; i < value.size(); i++) {
} if (value[i]) {
if(i != value.size() - 1) { ss << value[i]->name.c_str();
ss << " "; }
} if (i != value.size() - 1) {
} ss << " ";
attr = ss.str().c_str(); }
break; }
} attr = ss.str().c_str();
case SocketType::CLOSURE: break;
case SocketType::UNDEFINED: }
break; case SocketType::CLOSURE:
} case SocketType::UNDEFINED:
} break;
}
}
return xml_node; return xml_node;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -25,10 +25,10 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
struct XMLReader { struct XMLReader {
map<ustring, Node*> node_map; map<ustring, Node *> node_map;
}; };
void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node); void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node);
xml_node xml_write_node(Node *node, xml_node xml_root); xml_node xml_write_node(Node *node, xml_node xml_root);
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -1,7 +1,7 @@
remove_extra_strict_flags() remove_extra_strict_flags()
set(INC set(INC
.. ..
) )
set(INC_SYS set(INC_SYS
@@ -9,328 +9,328 @@ set(INC_SYS
) )
set(SRC_CPU_KERNELS set(SRC_CPU_KERNELS
kernels/cpu/kernel.cpp kernels/cpu/kernel.cpp
kernels/cpu/kernel_sse2.cpp kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp kernels/cpu/kernel_sse41.cpp
kernels/cpu/kernel_avx.cpp kernels/cpu/kernel_avx.cpp
kernels/cpu/kernel_avx2.cpp kernels/cpu/kernel_avx2.cpp
kernels/cpu/kernel_split.cpp kernels/cpu/kernel_split.cpp
kernels/cpu/kernel_split_sse2.cpp kernels/cpu/kernel_split_sse2.cpp
kernels/cpu/kernel_split_sse3.cpp kernels/cpu/kernel_split_sse3.cpp
kernels/cpu/kernel_split_sse41.cpp kernels/cpu/kernel_split_sse41.cpp
kernels/cpu/kernel_split_avx.cpp kernels/cpu/kernel_split_avx.cpp
kernels/cpu/kernel_split_avx2.cpp kernels/cpu/kernel_split_avx2.cpp
kernels/cpu/filter.cpp kernels/cpu/filter.cpp
kernels/cpu/filter_sse2.cpp kernels/cpu/filter_sse2.cpp
kernels/cpu/filter_sse3.cpp kernels/cpu/filter_sse3.cpp
kernels/cpu/filter_sse41.cpp kernels/cpu/filter_sse41.cpp
kernels/cpu/filter_avx.cpp kernels/cpu/filter_avx.cpp
kernels/cpu/filter_avx2.cpp kernels/cpu/filter_avx2.cpp
) )
set(SRC_CUDA_KERNELS set(SRC_CUDA_KERNELS
kernels/cuda/kernel.cu kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu kernels/cuda/kernel_split.cu
kernels/cuda/filter.cu kernels/cuda/filter.cu
) )
set(SRC_OPENCL_KERNELS set(SRC_OPENCL_KERNELS
kernels/opencl/kernel_bake.cl kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl kernels/opencl/kernel_displace.cl
kernels/opencl/kernel_background.cl kernels/opencl/kernel_background.cl
kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split_bundle.cl kernels/opencl/kernel_split_bundle.cl
kernels/opencl/kernel_data_init.cl kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl kernels/opencl/kernel_queue_enqueue.cl
kernels/opencl/kernel_scene_intersect.cl kernels/opencl/kernel_scene_intersect.cl
kernels/opencl/kernel_lamp_emission.cl kernels/opencl/kernel_lamp_emission.cl
kernels/opencl/kernel_do_volume.cl kernels/opencl/kernel_do_volume.cl
kernels/opencl/kernel_indirect_background.cl kernels/opencl/kernel_indirect_background.cl
kernels/opencl/kernel_shader_setup.cl kernels/opencl/kernel_shader_setup.cl
kernels/opencl/kernel_shader_sort.cl kernels/opencl/kernel_shader_sort.cl
kernels/opencl/kernel_shader_eval.cl kernels/opencl/kernel_shader_eval.cl
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl kernels/opencl/kernel_subsurface_scatter.cl
kernels/opencl/kernel_direct_lighting.cl kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked_ao.cl kernels/opencl/kernel_shadow_blocked_ao.cl
kernels/opencl/kernel_shadow_blocked_dl.cl kernels/opencl/kernel_shadow_blocked_dl.cl
kernels/opencl/kernel_enqueue_inactive.cl kernels/opencl/kernel_enqueue_inactive.cl
kernels/opencl/kernel_next_iteration_setup.cl kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl kernels/opencl/kernel_buffer_update.cl
kernels/opencl/filter.cl kernels/opencl/filter.cl
) )
set(SRC_BVH_HEADERS set(SRC_BVH_HEADERS
bvh/bvh.h bvh/bvh.h
bvh/bvh_nodes.h bvh/bvh_nodes.h
bvh/bvh_shadow_all.h bvh/bvh_shadow_all.h
bvh/bvh_local.h bvh/bvh_local.h
bvh/bvh_traversal.h bvh/bvh_traversal.h
bvh/bvh_types.h bvh/bvh_types.h
bvh/bvh_volume.h bvh/bvh_volume.h
bvh/bvh_volume_all.h bvh/bvh_volume_all.h
bvh/qbvh_nodes.h bvh/qbvh_nodes.h
bvh/qbvh_shadow_all.h bvh/qbvh_shadow_all.h
bvh/qbvh_local.h bvh/qbvh_local.h
bvh/qbvh_traversal.h bvh/qbvh_traversal.h
bvh/qbvh_volume.h bvh/qbvh_volume.h
bvh/qbvh_volume_all.h bvh/qbvh_volume_all.h
bvh/obvh_nodes.h bvh/obvh_nodes.h
bvh/obvh_shadow_all.h bvh/obvh_shadow_all.h
bvh/obvh_local.h bvh/obvh_local.h
bvh/obvh_traversal.h bvh/obvh_traversal.h
bvh/obvh_volume.h bvh/obvh_volume.h
bvh/obvh_volume_all.h bvh/obvh_volume_all.h
bvh/bvh_embree.h bvh/bvh_embree.h
) )
set(SRC_HEADERS set(SRC_HEADERS
kernel_accumulate.h kernel_accumulate.h
kernel_bake.h kernel_bake.h
kernel_camera.h kernel_camera.h
kernel_color.h kernel_color.h
kernel_compat_cpu.h kernel_compat_cpu.h
kernel_compat_cuda.h kernel_compat_cuda.h
kernel_compat_opencl.h kernel_compat_opencl.h
kernel_differential.h kernel_differential.h
kernel_emission.h kernel_emission.h
kernel_film.h kernel_film.h
kernel_globals.h kernel_globals.h
kernel_id_passes.h kernel_id_passes.h
kernel_jitter.h kernel_jitter.h
kernel_light.h kernel_light.h
kernel_math.h kernel_math.h
kernel_montecarlo.h kernel_montecarlo.h
kernel_passes.h kernel_passes.h
kernel_path.h kernel_path.h
kernel_path_branched.h kernel_path_branched.h
kernel_path_common.h kernel_path_common.h
kernel_path_state.h kernel_path_state.h
kernel_path_surface.h kernel_path_surface.h
kernel_path_subsurface.h kernel_path_subsurface.h
kernel_path_volume.h kernel_path_volume.h
kernel_profiling.h kernel_profiling.h
kernel_projection.h kernel_projection.h
kernel_queues.h kernel_queues.h
kernel_random.h kernel_random.h
kernel_shader.h kernel_shader.h
kernel_shadow.h kernel_shadow.h
kernel_subsurface.h kernel_subsurface.h
kernel_textures.h kernel_textures.h
kernel_types.h kernel_types.h
kernel_volume.h kernel_volume.h
kernel_work_stealing.h kernel_work_stealing.h
) )
set(SRC_KERNELS_CPU_HEADERS set(SRC_KERNELS_CPU_HEADERS
kernel.h kernel.h
kernels/cpu/kernel_cpu.h kernels/cpu/kernel_cpu.h
kernels/cpu/kernel_cpu_impl.h kernels/cpu/kernel_cpu_impl.h
kernels/cpu/kernel_cpu_image.h kernels/cpu/kernel_cpu_image.h
kernels/cpu/filter_cpu.h kernels/cpu/filter_cpu.h
kernels/cpu/filter_cpu_impl.h kernels/cpu/filter_cpu_impl.h
) )
set(SRC_KERNELS_CUDA_HEADERS set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_config.h kernels/cuda/kernel_config.h
kernels/cuda/kernel_cuda_image.h kernels/cuda/kernel_cuda_image.h
) )
set(SRC_KERNELS_OPENCL_HEADERS set(SRC_KERNELS_OPENCL_HEADERS
kernels/opencl/kernel_split_function.h kernels/opencl/kernel_split_function.h
kernels/opencl/kernel_opencl_image.h kernels/opencl/kernel_opencl_image.h
) )
set(SRC_CLOSURE_HEADERS set(SRC_CLOSURE_HEADERS
closure/alloc.h closure/alloc.h
closure/bsdf.h closure/bsdf.h
closure/bsdf_ashikhmin_velvet.h closure/bsdf_ashikhmin_velvet.h
closure/bsdf_diffuse.h closure/bsdf_diffuse.h
closure/bsdf_diffuse_ramp.h closure/bsdf_diffuse_ramp.h
closure/bsdf_microfacet.h closure/bsdf_microfacet.h
closure/bsdf_microfacet_multi.h closure/bsdf_microfacet_multi.h
closure/bsdf_microfacet_multi_impl.h closure/bsdf_microfacet_multi_impl.h
closure/bsdf_oren_nayar.h closure/bsdf_oren_nayar.h
closure/bsdf_phong_ramp.h closure/bsdf_phong_ramp.h
closure/bsdf_reflection.h closure/bsdf_reflection.h
closure/bsdf_refraction.h closure/bsdf_refraction.h
closure/bsdf_toon.h closure/bsdf_toon.h
closure/bsdf_transparent.h closure/bsdf_transparent.h
closure/bsdf_util.h closure/bsdf_util.h
closure/bsdf_ashikhmin_shirley.h closure/bsdf_ashikhmin_shirley.h
closure/bsdf_hair.h closure/bsdf_hair.h
closure/bssrdf.h closure/bssrdf.h
closure/emissive.h closure/emissive.h
closure/volume.h closure/volume.h
closure/bsdf_principled_diffuse.h closure/bsdf_principled_diffuse.h
closure/bsdf_principled_sheen.h closure/bsdf_principled_sheen.h
closure/bsdf_hair_principled.h closure/bsdf_hair_principled.h
) )
set(SRC_SVM_HEADERS set(SRC_SVM_HEADERS
svm/svm.h svm/svm.h
svm/svm_ao.h svm/svm_ao.h
svm/svm_attribute.h svm/svm_attribute.h
svm/svm_bevel.h svm/svm_bevel.h
svm/svm_blackbody.h svm/svm_blackbody.h
svm/svm_bump.h svm/svm_bump.h
svm/svm_camera.h svm/svm_camera.h
svm/svm_closure.h svm/svm_closure.h
svm/svm_convert.h svm/svm_convert.h
svm/svm_checker.h svm/svm_checker.h
svm/svm_color_util.h svm/svm_color_util.h
svm/svm_brick.h svm/svm_brick.h
svm/svm_displace.h svm/svm_displace.h
svm/svm_fresnel.h svm/svm_fresnel.h
svm/svm_wireframe.h svm/svm_wireframe.h
svm/svm_wavelength.h svm/svm_wavelength.h
svm/svm_gamma.h svm/svm_gamma.h
svm/svm_brightness.h svm/svm_brightness.h
svm/svm_geometry.h svm/svm_geometry.h
svm/svm_gradient.h svm/svm_gradient.h
svm/svm_hsv.h svm/svm_hsv.h
svm/svm_ies.h svm/svm_ies.h
svm/svm_image.h svm/svm_image.h
svm/svm_invert.h svm/svm_invert.h
svm/svm_light_path.h svm/svm_light_path.h
svm/svm_magic.h svm/svm_magic.h
svm/svm_mapping.h svm/svm_mapping.h
svm/svm_math.h svm/svm_math.h
svm/svm_math_util.h svm/svm_math_util.h
svm/svm_mix.h svm/svm_mix.h
svm/svm_musgrave.h svm/svm_musgrave.h
svm/svm_noise.h svm/svm_noise.h
svm/svm_noisetex.h svm/svm_noisetex.h
svm/svm_normal.h svm/svm_normal.h
svm/svm_ramp.h svm/svm_ramp.h
svm/svm_ramp_util.h svm/svm_ramp_util.h
svm/svm_sepcomb_hsv.h svm/svm_sepcomb_hsv.h
svm/svm_sepcomb_vector.h svm/svm_sepcomb_vector.h
svm/svm_sky.h svm/svm_sky.h
svm/svm_tex_coord.h svm/svm_tex_coord.h
svm/svm_texture.h svm/svm_texture.h
svm/svm_types.h svm/svm_types.h
svm/svm_value.h svm/svm_value.h
svm/svm_vector_transform.h svm/svm_vector_transform.h
svm/svm_voronoi.h svm/svm_voronoi.h
svm/svm_voxel.h svm/svm_voxel.h
svm/svm_wave.h svm/svm_wave.h
) )
set(SRC_GEOM_HEADERS set(SRC_GEOM_HEADERS
geom/geom.h geom/geom.h
geom/geom_attribute.h geom/geom_attribute.h
geom/geom_curve.h geom/geom_curve.h
geom/geom_curve_intersect.h geom/geom_curve_intersect.h
geom/geom_motion_curve.h geom/geom_motion_curve.h
geom/geom_motion_triangle.h geom/geom_motion_triangle.h
geom/geom_motion_triangle_intersect.h geom/geom_motion_triangle_intersect.h
geom/geom_motion_triangle_shader.h geom/geom_motion_triangle_shader.h
geom/geom_object.h geom/geom_object.h
geom/geom_patch.h geom/geom_patch.h
geom/geom_primitive.h geom/geom_primitive.h
geom/geom_subd_triangle.h geom/geom_subd_triangle.h
geom/geom_triangle.h geom/geom_triangle.h
geom/geom_triangle_intersect.h geom/geom_triangle_intersect.h
geom/geom_volume.h geom/geom_volume.h
) )
set(SRC_FILTER_HEADERS set(SRC_FILTER_HEADERS
filter/filter.h filter/filter.h
filter/filter_defines.h filter/filter_defines.h
filter/filter_features.h filter/filter_features.h
filter/filter_features_sse.h filter/filter_features_sse.h
filter/filter_kernel.h filter/filter_kernel.h
filter/filter_nlm_cpu.h filter/filter_nlm_cpu.h
filter/filter_nlm_gpu.h filter/filter_nlm_gpu.h
filter/filter_prefilter.h filter/filter_prefilter.h
filter/filter_reconstruction.h filter/filter_reconstruction.h
filter/filter_transform.h filter/filter_transform.h
filter/filter_transform_gpu.h filter/filter_transform_gpu.h
filter/filter_transform_sse.h filter/filter_transform_sse.h
) )
set(SRC_UTIL_HEADERS set(SRC_UTIL_HEADERS
../util/util_atomic.h ../util/util_atomic.h
../util/util_color.h ../util/util_color.h
../util/util_defines.h ../util/util_defines.h
../util/util_half.h ../util/util_half.h
../util/util_hash.h ../util/util_hash.h
../util/util_math.h ../util/util_math.h
../util/util_math_fast.h ../util/util_math_fast.h
../util/util_math_intersect.h ../util/util_math_intersect.h
../util/util_math_float2.h ../util/util_math_float2.h
../util/util_math_float3.h ../util/util_math_float3.h
../util/util_math_float4.h ../util/util_math_float4.h
../util/util_math_int2.h ../util/util_math_int2.h
../util/util_math_int3.h ../util/util_math_int3.h
../util/util_math_int4.h ../util/util_math_int4.h
../util/util_math_matrix.h ../util/util_math_matrix.h
../util/util_projection.h ../util/util_projection.h
../util/util_rect.h ../util/util_rect.h
../util/util_static_assert.h ../util/util_static_assert.h
../util/util_transform.h ../util/util_transform.h
../util/util_texture.h ../util/util_texture.h
../util/util_types.h ../util/util_types.h
../util/util_types_float2.h ../util/util_types_float2.h
../util/util_types_float2_impl.h ../util/util_types_float2_impl.h
../util/util_types_float3.h ../util/util_types_float3.h
../util/util_types_float3_impl.h ../util/util_types_float3_impl.h
../util/util_types_float4.h ../util/util_types_float4.h
../util/util_types_float4_impl.h ../util/util_types_float4_impl.h
../util/util_types_float8.h ../util/util_types_float8.h
../util/util_types_float8_impl.h ../util/util_types_float8_impl.h
../util/util_types_int2.h ../util/util_types_int2.h
../util/util_types_int2_impl.h ../util/util_types_int2_impl.h
../util/util_types_int3.h ../util/util_types_int3.h
../util/util_types_int3_impl.h ../util/util_types_int3_impl.h
../util/util_types_int4.h ../util/util_types_int4.h
../util/util_types_int4_impl.h ../util/util_types_int4_impl.h
../util/util_types_uchar2.h ../util/util_types_uchar2.h
../util/util_types_uchar2_impl.h ../util/util_types_uchar2_impl.h
../util/util_types_uchar3.h ../util/util_types_uchar3.h
../util/util_types_uchar3_impl.h ../util/util_types_uchar3_impl.h
../util/util_types_uchar4.h ../util/util_types_uchar4.h
../util/util_types_uchar4_impl.h ../util/util_types_uchar4_impl.h
../util/util_types_uint2.h ../util/util_types_uint2.h
../util/util_types_uint2_impl.h ../util/util_types_uint2_impl.h
../util/util_types_uint3.h ../util/util_types_uint3.h
../util/util_types_uint3_impl.h ../util/util_types_uint3_impl.h
../util/util_types_uint4.h ../util/util_types_uint4.h
../util/util_types_uint4_impl.h ../util/util_types_uint4_impl.h
../util/util_types_ushort4.h ../util/util_types_ushort4.h
../util/util_types_vector3.h ../util/util_types_vector3.h
../util/util_types_vector3_impl.h ../util/util_types_vector3_impl.h
) )
set(SRC_SPLIT_HEADERS set(SRC_SPLIT_HEADERS
split/kernel_branched.h split/kernel_branched.h
split/kernel_buffer_update.h split/kernel_buffer_update.h
split/kernel_data_init.h split/kernel_data_init.h
split/kernel_direct_lighting.h split/kernel_direct_lighting.h
split/kernel_do_volume.h split/kernel_do_volume.h
split/kernel_enqueue_inactive.h split/kernel_enqueue_inactive.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h split/kernel_indirect_subsurface.h
split/kernel_lamp_emission.h split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h split/kernel_next_iteration_setup.h
split/kernel_path_init.h split/kernel_path_init.h
split/kernel_queue_enqueue.h split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h split/kernel_scene_intersect.h
split/kernel_shader_setup.h split/kernel_shader_setup.h
split/kernel_shader_sort.h split/kernel_shader_sort.h
split/kernel_shader_eval.h split/kernel_shader_eval.h
split/kernel_shadow_blocked_ao.h split/kernel_shadow_blocked_ao.h
split/kernel_shadow_blocked_dl.h split/kernel_shadow_blocked_dl.h
split/kernel_split_common.h split/kernel_split_common.h
split/kernel_split_data.h split/kernel_split_data.h
split/kernel_split_data_types.h split/kernel_split_data_types.h
split/kernel_subsurface_scatter.h split/kernel_subsurface_scatter.h
) )
set(LIB set(LIB
@@ -340,145 +340,145 @@ set(LIB
# CUDA module # CUDA module
if(WITH_CYCLES_CUDA_BINARIES) if(WITH_CYCLES_CUDA_BINARIES)
# 64 bit only # 64 bit only
set(CUDA_BITS 64) set(CUDA_BITS 64)
# CUDA version # CUDA version
execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}") string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}") string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions # warn for other versions
if(CUDA_VERSION MATCHES "101") if(CUDA_VERSION MATCHES "101")
else() else()
message(WARNING message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
"build may succeed but only CUDA 10.1 is officially supported") "build may succeed but only CUDA 10.1 is officially supported")
endif() endif()
# build for each arch # build for each arch
set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
${SRC_HEADERS} ${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS} ${SRC_KERNELS_CUDA_HEADERS}
${SRC_BVH_HEADERS} ${SRC_BVH_HEADERS}
${SRC_SVM_HEADERS} ${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS} ${SRC_GEOM_HEADERS}
${SRC_CLOSURE_HEADERS} ${SRC_CLOSURE_HEADERS}
${SRC_UTIL_HEADERS} ${SRC_UTIL_HEADERS}
) )
set(cuda_filter_sources kernels/cuda/filter.cu set(cuda_filter_sources kernels/cuda/filter.cu
${SRC_HEADERS} ${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS} ${SRC_KERNELS_CUDA_HEADERS}
${SRC_FILTER_HEADERS} ${SRC_FILTER_HEADERS}
${SRC_UTIL_HEADERS} ${SRC_UTIL_HEADERS}
) )
set(cuda_cubins) set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
set(cuda_cubin ${name}_${arch}.cubin) set(cuda_cubin ${name}_${arch}.cubin)
set(kernel_sources ${sources}) set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none") if(NOT ${prev_arch} STREQUAL "none")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
endif() endif()
set(cuda_kernel_src "/kernels/cuda/${name}.cu") set(cuda_kernel_src "/kernels/cuda/${name}.cu")
set(cuda_flags set(cuda_flags
-D CCL_NAMESPACE_BEGIN= -D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END= -D CCL_NAMESPACE_END=
-D NVCC -D NVCC
-m ${CUDA_BITS} -m ${CUDA_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
--use_fast_math --use_fast_math
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
if(${experimental}) if(${experimental})
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
set(name ${name}_experimental) set(name ${name}_experimental)
endif() endif()
if(WITH_CYCLES_DEBUG) if(WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
endif() endif()
if(WITH_CYCLES_CUBIN_COMPILER) if(WITH_CYCLES_CUBIN_COMPILER)
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
# Needed to find libnvrtc-builtins.so. Can't do it from inside # Needed to find libnvrtc-builtins.so. Can't do it from inside
# cycles_cubin_cc since the env variable is read before main() # cycles_cubin_cc since the env variable is read before main()
if(APPLE) if(APPLE)
set(CUBIN_CC_ENV ${CMAKE_COMMAND} set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
elseif(UNIX) elseif(UNIX)
set(CUBIN_CC_ENV ${CMAKE_COMMAND} set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif() endif()
add_custom_command( add_custom_command(
OUTPUT ${cuda_cubin} OUTPUT ${cuda_cubin}
COMMAND ${CUBIN_CC_ENV} COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>" "$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH} -target ${CUDA_ARCH}
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
${cuda_flags} ${cuda_flags}
-v -v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc) DEPENDS ${kernel_sources} cycles_cubin_cc)
else() else()
add_custom_command( add_custom_command(
OUTPUT ${cuda_cubin} OUTPUT ${cuda_cubin}
COMMAND ${CUDA_NVCC_EXECUTABLE} COMMAND ${CUDA_NVCC_EXECUTABLE}
-arch=${arch} -arch=${arch}
${CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS}
--cubin --cubin
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--ptxas-options="-v" --ptxas-options="-v"
${cuda_flags} ${cuda_flags}
DEPENDS ${kernel_sources}) DEPENDS ${kernel_sources})
endif() endif()
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin}) list(APPEND cuda_cubins ${cuda_cubin})
unset(cuda_debug_flags) unset(cuda_debug_flags)
endmacro() endmacro()
set(prev_arch "none") set(prev_arch "none")
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
if(${arch} MATCHES "sm_2.") if(${arch} MATCHES "sm_2.")
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100) elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.") message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
else() else()
# Compile regular kernel # Compile regular kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE) CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE) CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
# Compile split kernel # Compile split kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE) CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
endif() endif()
if(WITH_CYCLES_CUDA_BUILD_SERIAL) if(WITH_CYCLES_CUDA_BUILD_SERIAL)
set(prev_arch ${arch}) set(prev_arch ${arch})
endif() endif()
endif() endif()
endforeach() endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
cycles_set_solution_folder(cycles_kernel_cuda) cycles_set_solution_folder(cycles_kernel_cuda)
endif() endif()
# OSL module # OSL module
if(WITH_CYCLES_OSL) if(WITH_CYCLES_OSL)
list(APPEND LIB list(APPEND LIB
cycles_kernel_osl cycles_kernel_osl
) )
add_subdirectory(osl) add_subdirectory(osl)
add_subdirectory(shaders) add_subdirectory(shaders)
endif() endif()
# CPU module # CPU module
@@ -491,56 +491,56 @@ set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAG
set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
if(CXX_HAS_SSE) if(CXX_HAS_SSE)
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif() endif()
if(CXX_HAS_AVX) if(CXX_HAS_AVX)
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif() endif()
if(CXX_HAS_AVX2) if(CXX_HAS_AVX2)
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif() endif()
cycles_add_library(cycles_kernel "${LIB}" cycles_add_library(cycles_kernel "${LIB}"
${SRC_CPU_KERNELS} ${SRC_CPU_KERNELS}
${SRC_CUDA_KERNELS} ${SRC_CUDA_KERNELS}
${SRC_OPENCL_KERNELS} ${SRC_OPENCL_KERNELS}
${SRC_HEADERS} ${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS} ${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS} ${SRC_KERNELS_CUDA_HEADERS}
${SRC_KERNELS_OPENCL_HEADERS} ${SRC_KERNELS_OPENCL_HEADERS}
${SRC_BVH_HEADERS} ${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS} ${SRC_CLOSURE_HEADERS}
${SRC_FILTER_HEADERS} ${SRC_FILTER_HEADERS}
${SRC_SVM_HEADERS} ${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS} ${SRC_GEOM_HEADERS}
${SRC_SPLIT_HEADERS} ${SRC_SPLIT_HEADERS}
) )
if(WITH_CYCLES_CUDA) if(WITH_CYCLES_CUDA)
add_dependencies(cycles_kernel cycles_kernel_cuda) add_dependencies(cycles_kernel cycles_kernel_cuda)
endif() endif()
# OpenCL kernel # OpenCL kernel
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl) #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command( #add_custom_command(
# OUTPUT ${KERNEL_PREPROCESSED} # OUTPUT ${KERNEL_PREPROCESSED}
# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED} # COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS}) # DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)

Some files were not shown because too many files have changed in this diff Show More